[v6,05/10] dts: add node memory setup

Message ID 20230303102507.527790-6-juraj.linkes@pantheon.tech (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series dts: add hello world test case |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Juraj Linkeš March 3, 2023, 10:25 a.m. UTC
Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.

The setup is opt-in, i.e. users need to supply hugepage configuration to
instruct DTS to configure them. It not configured, hugepage
configuration will be skipped. This is helpful if users don't want DTS
to tamper with hugepages on their system.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
 dts/conf.yaml                                 |  3 +
 dts/framework/config/__init__.py              | 14 ++++
 dts/framework/config/conf_yaml_schema.json    | 21 +++++
 dts/framework/remote_session/linux_session.py | 78 +++++++++++++++++++
 dts/framework/remote_session/os_session.py    |  8 ++
 dts/framework/testbed_model/node.py           | 12 +++
 6 files changed, 136 insertions(+)
  

Patch

diff --git a/dts/conf.yaml b/dts/conf.yaml
index 1648e5c3c5..6540a45ef7 100644
--- a/dts/conf.yaml
+++ b/dts/conf.yaml
@@ -18,3 +18,6 @@  nodes:
     lcores: ""
     use_first_core: false
     memory_channels: 4
+    hugepages:  # optional; if removed, will use system hugepage configuration
+        amount: 256
+        force_first_numa: false
diff --git a/dts/framework/config/__init__.py b/dts/framework/config/__init__.py
index 17b917f3b3..0e5f493c5d 100644
--- a/dts/framework/config/__init__.py
+++ b/dts/framework/config/__init__.py
@@ -66,6 +66,12 @@  class Compiler(StrEnum):
 #
 # Frozen makes the object immutable. This enables further optimizations,
 # and makes it thread safe should we every want to move in that direction.
+@dataclass(slots=True, frozen=True)
+class HugepageConfiguration:
+    amount: int
+    force_first_numa: bool
+
+
 @dataclass(slots=True, frozen=True)
 class NodeConfiguration:
     name: str
@@ -77,9 +83,16 @@  class NodeConfiguration:
     lcores: str
     use_first_core: bool
     memory_channels: int
+    hugepages: HugepageConfiguration | None
 
     @staticmethod
     def from_dict(d: dict) -> "NodeConfiguration":
+        hugepage_config = d.get("hugepages")
+        if hugepage_config:
+            if "force_first_numa" not in hugepage_config:
+                hugepage_config["force_first_numa"] = False
+            hugepage_config = HugepageConfiguration(**hugepage_config)
+
         return NodeConfiguration(
             name=d["name"],
             hostname=d["hostname"],
@@ -90,6 +103,7 @@  def from_dict(d: dict) -> "NodeConfiguration":
             lcores=d.get("lcores", "1"),
             use_first_core=d.get("use_first_core", False),
             memory_channels=d.get("memory_channels", 1),
+            hugepages=hugepage_config,
         )
 
 
diff --git a/dts/framework/config/conf_yaml_schema.json b/dts/framework/config/conf_yaml_schema.json
index 334b4bd8ab..56f93def36 100644
--- a/dts/framework/config/conf_yaml_schema.json
+++ b/dts/framework/config/conf_yaml_schema.json
@@ -75,6 +75,24 @@ 
         "cpu",
         "compiler"
       ]
+    },
+    "hugepages": {
+      "type": "object",
+      "description": "Optional hugepage configuration. If not specified, hugepages won't be configured and DTS will use system configuration.",
+      "properties": {
+        "amount": {
+          "type": "integer",
+          "description": "The amount of hugepages to configure. Hugepage size will be the system default."
+        },
+        "force_first_numa": {
+          "type": "boolean",
+          "description": "Set to True to force configuring hugepages on the first NUMA node. Defaults to False."
+        }
+      },
+      "additionalProperties": false,
+      "required": [
+        "amount"
+      ]
     }
   },
   "type": "object",
@@ -118,6 +136,9 @@ 
           "memory_channels": {
             "type": "integer",
             "description": "How many memory channels to use. Optional, defaults to 1."
+          },
+          "hugepages": {
+            "$ref": "#/definitions/hugepages"
           }
         },
         "additionalProperties": false,
diff --git a/dts/framework/remote_session/linux_session.py b/dts/framework/remote_session/linux_session.py
index c49b6bb1d7..a1e3bc3a92 100644
--- a/dts/framework/remote_session/linux_session.py
+++ b/dts/framework/remote_session/linux_session.py
@@ -2,7 +2,9 @@ 
 # Copyright(c) 2023 PANTHEON.tech s.r.o.
 # Copyright(c) 2023 University of New Hampshire
 
+from framework.exception import RemoteCommandExecutionError
 from framework.testbed_model import LogicalCore
+from framework.utils import expand_range
 
 from .posix_session import PosixSession
 
@@ -27,3 +29,79 @@  def get_remote_cpus(self, use_first_core: bool) -> list[LogicalCore]:
 
     def get_dpdk_file_prefix(self, dpdk_prefix) -> str:
         return dpdk_prefix
+
+    def setup_hugepages(self, hugepage_amount: int, force_first_numa: bool) -> None:
+        self._logger.info("Getting Hugepage information.")
+        hugepage_size = self._get_hugepage_size()
+        hugepages_total = self._get_hugepages_total()
+        self._numa_nodes = self._get_numa_nodes()
+
+        if force_first_numa or hugepages_total != hugepage_amount:
+            # when forcing numa, we need to clear existing hugepages regardless
+            # of size, so they can be moved to the first numa node
+            self._configure_huge_pages(hugepage_amount, hugepage_size, force_first_numa)
+        else:
+            self._logger.info("Hugepages already configured.")
+        self._mount_huge_pages()
+
+    def _get_hugepage_size(self) -> int:
+        hugepage_size = self.remote_session.send_command(
+            "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
+        ).stdout
+        return int(hugepage_size)
+
+    def _get_hugepages_total(self) -> int:
+        hugepages_total = self.remote_session.send_command(
+            "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+        ).stdout
+        return int(hugepages_total)
+
+    def _get_numa_nodes(self) -> list[int]:
+        try:
+            numa_count = self.remote_session.send_command(
+                "cat /sys/devices/system/node/online", verify=True
+            ).stdout
+            numa_range = expand_range(numa_count)
+        except RemoteCommandExecutionError:
+            # the file doesn't exist, meaning the node doesn't support numa
+            numa_range = []
+        return numa_range
+
+    def _mount_huge_pages(self) -> None:
+        self._logger.info("Re-mounting Hugepages.")
+        hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
+        self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
+        result = self.remote_session.send_command(hugapge_fs_cmd)
+        if result.stdout == "":
+            remote_mount_path = "/mnt/huge"
+            self.remote_session.send_command(f"mkdir -p {remote_mount_path}")
+            self.remote_session.send_command(
+                f"mount -t hugetlbfs nodev {remote_mount_path}"
+            )
+
+    def _supports_numa(self) -> bool:
+        # the system supports numa if self._numa_nodes is non-empty and there are more
+        # than one numa node (in the latter case it may actually support numa, but
+        # there's no reason to do any numa specific configuration)
+        return len(self._numa_nodes) > 1
+
+    def _configure_huge_pages(
+        self, amount: int, size: int, force_first_numa: bool
+    ) -> None:
+        self._logger.info("Configuring Hugepages.")
+        hugepage_config_path = (
+            f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
+        )
+        if force_first_numa and self._supports_numa():
+            # clear non-numa hugepages
+            self.remote_session.send_command(
+                f"echo 0 | sudo tee {hugepage_config_path}"
+            )
+            hugepage_config_path = (
+                f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
+                f"/hugepages-{size}kB/nr_hugepages"
+            )
+
+        self.remote_session.send_command(
+            f"echo {amount} | sudo tee {hugepage_config_path}"
+        )
diff --git a/dts/framework/remote_session/os_session.py b/dts/framework/remote_session/os_session.py
index 0a42f40a86..048bf7178e 100644
--- a/dts/framework/remote_session/os_session.py
+++ b/dts/framework/remote_session/os_session.py
@@ -151,3 +151,11 @@  def get_dpdk_file_prefix(self, dpdk_prefix) -> str:
         """
         Get the DPDK file prefix that will be used when running DPDK apps.
         """
+
+    @abstractmethod
+    def setup_hugepages(self, hugepage_amount: int, force_first_numa: bool) -> None:
+        """
+        Get the node's Hugepage Size, configure the specified amount of hugepages
+        if needed and mount the hugepages if needed.
+        If force_first_numa is True, configure hugepages just on the first socket.
+        """
diff --git a/dts/framework/testbed_model/node.py b/dts/framework/testbed_model/node.py
index f63b755801..d48fafe65d 100644
--- a/dts/framework/testbed_model/node.py
+++ b/dts/framework/testbed_model/node.py
@@ -62,6 +62,7 @@  def set_up_execution(self, execution_config: ExecutionConfiguration) -> None:
         Perform the execution setup that will be done for each execution
         this node is part of.
         """
+        self._setup_hugepages()
         self._set_up_execution(execution_config)
 
     def _set_up_execution(self, execution_config: ExecutionConfiguration) -> None:
@@ -154,6 +155,17 @@  def _get_remote_cpus(self) -> None:
         self._logger.info("Getting CPU information.")
         self.lcores = self.main_session.get_remote_cpus(self.config.use_first_core)
 
+    def _setup_hugepages(self):
+        """
+        Setup hugepages on the Node. Different architectures can supply different
+        amounts of memory for hugepages and numa-based hugepage allocation may need
+        to be considered.
+        """
+        if self.config.hugepages:
+            self.main_session.setup_hugepages(
+                self.config.hugepages.amount, self.config.hugepages.force_first_numa
+            )
+
     def close(self) -> None:
         """
         Close all connections and free other resources.