[v3,05/10] dts: add node memory setup

Message ID 20230117154906.860916-6-juraj.linkes@pantheon.tech (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series dts: add hello world testcase |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Juraj Linkeš Jan. 17, 2023, 3:49 p.m. UTC
  Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
 dts/framework/config/__init__.py              | 16 ++++
 dts/framework/config/arch.py                  | 57 +++++++++++++
 dts/framework/remote_session/linux_session.py | 85 +++++++++++++++++++
 dts/framework/remote_session/os_session.py    | 10 +++
 dts/framework/testbed_model/node.py           | 15 ++++
 5 files changed, 183 insertions(+)
 create mode 100644 dts/framework/config/arch.py
  

Patch

diff --git a/dts/framework/config/__init__.py b/dts/framework/config/__init__.py
index 17b917f3b3..ce6e709c6f 100644
--- a/dts/framework/config/__init__.py
+++ b/dts/framework/config/__init__.py
@@ -19,6 +19,8 @@ 
 
 from framework.settings import SETTINGS
 
+from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
+
 
 class StrEnum(Enum):
     @staticmethod
@@ -176,3 +178,17 @@  def load_config() -> Configuration:
 
 
 CONFIGURATION = load_config()
+
+
+def create_arch(node_config: NodeConfiguration) -> Arch:
+    match node_config.arch:
+        case Architecture.x86_64:
+            return x86_64()
+        case Architecture.x86_32:
+            return x86_32()
+        case Architecture.i686:
+            return i686()
+        case Architecture.ppc64le:
+            return PPC64()
+        case Architecture.arm64:
+            return Arm64()
diff --git a/dts/framework/config/arch.py b/dts/framework/config/arch.py
new file mode 100644
index 0000000000..a226b9a6a9
--- /dev/null
+++ b/dts/framework/config/arch.py
@@ -0,0 +1,57 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 PANTHEON.tech s.r.o.
+
+
+class Arch(object):
+    """
+    Stores architecture-specific information.
+    """
+
+    @property
+    def default_hugepage_memory(self) -> int:
+        """
+        Return the default amount of memory allocated for hugepages DPDK will use.
+        The default is an amount equal to 256 2MB hugepages (512MB memory).
+        """
+        return 256 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        """
+        An architecture may need to force configuration of hugepages to first socket.
+        """
+        return False
+
+
+class x86_64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 4096 * 2048
+
+
+class x86_32(Arch):
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class i686(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class PPC64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+
+class Arm64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 2048 * 2048
diff --git a/dts/framework/remote_session/linux_session.py b/dts/framework/remote_session/linux_session.py
index 6809102038..4dc52132d3 100644
--- a/dts/framework/remote_session/linux_session.py
+++ b/dts/framework/remote_session/linux_session.py
@@ -2,7 +2,9 @@ 
 # Copyright(c) 2023 PANTHEON.tech s.r.o.
 # Copyright(c) 2023 University of New Hampshire
 
+from framework.exception import RemoteCommandExecutionError
 from framework.testbed_model import LogicalCore
+from framework.utils import expand_range
 
 from .posix_session import PosixSession
 
@@ -27,3 +29,86 @@  def get_remote_cpus(self, use_first_core: bool) -> list[LogicalCore]:
 
     def get_dpdk_file_prefix(self, dpdk_prefix) -> str:
         return dpdk_prefix
+
+    def setup_hugepages(
+        self, hugepage_amount: int, force_first_numa: bool = False
+    ) -> None:
+        self._logger.info("Getting Hugepage information.")
+        hugepage_size = self._get_hugepage_size()
+        hugepages_total = self._get_hugepages_total()
+        self._numa_nodes = self._get_numa_nodes()
+
+        target_hugepages_total = int(hugepage_amount / hugepage_size)
+        if hugepage_amount % hugepage_size:
+            target_hugepages_total += 1
+        if force_first_numa or hugepages_total != target_hugepages_total:
+            # when forcing numa, we need to clear existing hugepages regardless
+            # of size, so they can be moved to the first numa node
+            self._configure_huge_pages(
+                target_hugepages_total, hugepage_size, force_first_numa
+            )
+        else:
+            self._logger.info("Hugepages already configured.")
+        self._mount_huge_pages()
+
+    def _get_hugepage_size(self) -> int:
+        hugepage_size = self.remote_session.send_command(
+            "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
+        ).stdout
+        return int(hugepage_size)
+
+    def _get_hugepages_total(self) -> int:
+        hugepages_total = self.remote_session.send_command(
+            "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+        ).stdout
+        return int(hugepages_total)
+
+    def _get_numa_nodes(self) -> list[int]:
+        try:
+            numa_count = self.remote_session.send_command(
+                "cat /sys/devices/system/node/online", verify=True
+            ).stdout
+            numa_range = expand_range(numa_count)
+        except RemoteCommandExecutionError:
+            # the file doesn't exist, meaning the node doesn't support numa
+            numa_range = []
+        return numa_range
+
+    def _mount_huge_pages(self) -> None:
+        self._logger.info("Re-mounting Hugepages.")
+        hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
+        self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
+        result = self.remote_session.send_command(hugapge_fs_cmd)
+        if result.stdout == "":
+            remote_mount_path = "/mnt/huge"
+            self.remote_session.send_command(f"mkdir -p {remote_mount_path}")
+            self.remote_session.send_command(
+                f"mount -t hugetlbfs nodev {remote_mount_path}"
+            )
+
+    def _supports_numa(self) -> bool:
+        # the system supports numa if self._numa_nodes is non-empty and there are more
+        # than one numa node (in the latter case it may actually support numa, but
+        # there's no reason to do any numa specific configuration)
+        return len(self._numa_nodes) > 1
+
+    def _configure_huge_pages(
+        self, amount: int, size: int, force_first_numa: bool
+    ) -> None:
+        self._logger.info("Configuring Hugepages.")
+        hugepage_config_path = (
+            f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
+        )
+        if force_first_numa and self._supports_numa():
+            # clear non-numa hugepages
+            self.remote_session.send_command(
+                f"echo 0 | sudo tee {hugepage_config_path}"
+            )
+            hugepage_config_path = (
+                f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
+                f"/hugepages-{size}kB/nr_hugepages"
+            )
+
+        self.remote_session.send_command(
+            f"echo {amount} | sudo tee {hugepage_config_path}"
+        )
diff --git a/dts/framework/remote_session/os_session.py b/dts/framework/remote_session/os_session.py
index c30753e0b8..966b7f76d5 100644
--- a/dts/framework/remote_session/os_session.py
+++ b/dts/framework/remote_session/os_session.py
@@ -151,3 +151,13 @@  def get_dpdk_file_prefix(self, dpdk_prefix) -> str:
         """
         Get the DPDK file prefix that will be used when running DPDK apps.
         """
+
+    @abstractmethod
+    def setup_hugepages(
+        self, hugepage_amount: int, force_first_numa: bool = False
+    ) -> None:
+        """
+        Get the node's Hugepage Size, configure the specified amount of hugepages
+        if needed and mount the hugepages if needed.
+        If force_first_numa is True, configure hugepages just on the first socket.
+        """
diff --git a/dts/framework/testbed_model/node.py b/dts/framework/testbed_model/node.py
index cf2af2ca72..d22bf3b7d2 100644
--- a/dts/framework/testbed_model/node.py
+++ b/dts/framework/testbed_model/node.py
@@ -8,9 +8,11 @@ 
 """
 
 from framework.config import (
+    Arch,
     BuildTargetConfiguration,
     ExecutionConfiguration,
     NodeConfiguration,
+    create_arch,
 )
 from framework.logger import DTSLOG, getLogger
 from framework.remote_session import OSSession, create_session
@@ -37,6 +39,7 @@  class Node(object):
     lcores: list[LogicalCore]
     _logger: DTSLOG
     _other_sessions: list[OSSession]
+    _arch: Arch
 
     def __init__(self, node_config: NodeConfiguration):
         self.config = node_config
@@ -51,6 +54,7 @@  def __init__(self, node_config: NodeConfiguration):
         ).filter()
 
         self._other_sessions = []
+        self._arch = create_arch(self.config)
 
         self._logger.info(f"Created node: {self.name}")
 
@@ -59,6 +63,7 @@  def set_up_execution(self, execution_config: ExecutionConfiguration) -> None:
         Perform the execution setup that will be done for each execution
         this node is part of.
         """
+        self._setup_hugepages()
         self._set_up_execution(execution_config)
 
     def _set_up_execution(self, execution_config: ExecutionConfiguration) -> None:
@@ -153,6 +158,16 @@  def _get_remote_cpus(self) -> None:
         self._logger.info("Getting CPU information.")
         self.lcores = self.main_session.get_remote_cpus(self.config.use_first_core)
 
+    def _setup_hugepages(self):
+        """
+        Setup hugepages on the Node. Different architectures can supply different
+        amounts of memory for hugepages and numa-based hugepage allocation may need
+        to be considered.
+        """
+        self.main_session.setup_hugepages(
+            self._arch.default_hugepage_memory, self._arch.hugepage_force_first_numa
+        )
+
     def close(self) -> None:
         """
         Close all connections and free other resources.