[RFC,v2,05/10] dts: add node memory setup

Message ID 20221114165438.1133783-6-juraj.linkes@pantheon.tech (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series dts: add hello world testcase |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Juraj Linkeš Nov. 14, 2022, 4:54 p.m. UTC
Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
 dts/framework/remote_session/__init__.py      |  1 +
 dts/framework/remote_session/arch/__init__.py | 20 +++++
 dts/framework/remote_session/arch/arch.py     | 57 +++++++++++++
 .../remote_session/os/linux_session.py        | 85 +++++++++++++++++++
 dts/framework/remote_session/os/os_session.py | 10 +++
 dts/framework/testbed_model/node/node.py      | 15 +++-
 6 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 dts/framework/remote_session/arch/__init__.py
 create mode 100644 dts/framework/remote_session/arch/arch.py
  

Comments

Owen Hilyard Nov. 16, 2022, 1:47 p.m. UTC | #1
On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech>
wrote:

> Setup hugepages on nodes. This is useful not only on SUT nodes, but
> also on TG nodes which use TGs that utilize hugepages.
>
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
>  dts/framework/remote_session/__init__.py      |  1 +
>  dts/framework/remote_session/arch/__init__.py | 20 +++++
>  dts/framework/remote_session/arch/arch.py     | 57 +++++++++++++
>  .../remote_session/os/linux_session.py        | 85 +++++++++++++++++++
>  dts/framework/remote_session/os/os_session.py | 10 +++
>  dts/framework/testbed_model/node/node.py      | 15 +++-
>  6 files changed, 187 insertions(+), 1 deletion(-)
>  create mode 100644 dts/framework/remote_session/arch/__init__.py
>  create mode 100644 dts/framework/remote_session/arch/arch.py
>
> diff --git a/dts/framework/remote_session/__init__.py
> b/dts/framework/remote_session/__init__.py
> index f2339b20bd..f0deeadac6 100644
> --- a/dts/framework/remote_session/__init__.py
> +++ b/dts/framework/remote_session/__init__.py
> @@ -11,4 +11,5 @@
>
>  # pylama:ignore=W0611
>
> +from .arch import Arch, create_arch
>  from .os import OSSession, create_session
> diff --git a/dts/framework/remote_session/arch/__init__.py
> b/dts/framework/remote_session/arch/__init__.py
> new file mode 100644
> index 0000000000..d78ad42ac5
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/__init__.py
> @@ -0,0 +1,20 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +from framework.config import Architecture, NodeConfiguration
> +
> +from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
> +
> +
> +def create_arch(node_config: NodeConfiguration) -> Arch:
> +    match node_config.arch:
> +        case Architecture.x86_64:
> +            return x86_64()
> +        case Architecture.x86_32:
> +            return x86_32()
> +        case Architecture.i686:
> +            return i686()
> +        case Architecture.ppc64le:
> +            return PPC64()
> +        case Architecture.arm64:
> +            return Arm64()
> diff --git a/dts/framework/remote_session/arch/arch.py
> b/dts/framework/remote_session/arch/arch.py
> new file mode 100644
> index 0000000000..05c7602def
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/arch.py
> @@ -0,0 +1,57 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +
> +class Arch(object):
> +    """
> +    Stores architecture-specific information.
> +    """
> +
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        """
> +        Return the default amount of memory allocated for hugepages DPDK
> will use.
> +        The default is an amount equal to 256 2MB hugepages (512MB
> memory).
> +        """
> +        return 256 * 2048
> +
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        """
> +        An architecture may need to force configuration of hugepages to
> first socket.
> +        """
> +        return False
> +
> +
> +class x86_64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 4096 * 2048
> +
> +
> +class x86_32(Arch):
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        return True
> +
> +
> +class i686(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 512 * 2048
> +
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        return True
> +
> +
> +class PPC64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 512 * 2048
> +
> +
> +class Arm64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 2048 * 2048
> diff --git a/dts/framework/remote_session/os/linux_session.py
> b/dts/framework/remote_session/os/linux_session.py
> index 21f117b714..fad33d7613 100644
> --- a/dts/framework/remote_session/os/linux_session.py
> +++ b/dts/framework/remote_session/os/linux_session.py
> @@ -3,6 +3,8 @@
>  # Copyright(c) 2022 University of New Hampshire
>
>  from framework.config import CPU
> +from framework.exception import RemoteCommandExecutionError
> +from framework.utils import expand_range
>
>  from .posix_session import PosixSession
>
> @@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) ->
> list[CPU]:
>                  continue
>              cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
>          return cpus
> +
> +    def setup_hugepages(
> +        self, hugepage_amount: int = -1, force_first_numa: bool = False
>

I think that hugepage_amount: int | None = None is better, since it
expresses it is an optional argument and the type checker will force anyone
using the value to check if it is none, whereas that will not happen with
-1.


> +    ) -> None:
> +        self.logger.info("Getting Hugepage information.")
> +        hugepage_size = self._get_hugepage_size()
> +        hugepages_total = self._get_hugepages_total()
> +        self._numa_nodes = self._get_numa_nodes()
> +
> +        target_hugepages_total = int(hugepage_amount / hugepage_size)
> +        if hugepage_amount % hugepage_size:
> +            target_hugepages_total += 1
> +        if force_first_numa or hugepages_total != target_hugepages_total:
> +            # when forcing numa, we need to clear existing hugepages
> regardless
> +            # of size, so they can be moved to the first numa node
> +            self._configure_huge_pages(
> +                target_hugepages_total, hugepage_size, force_first_numa
> +            )
> +        else:
> +            self.logger.info("Hugepages already configured.")
> +        self._mount_huge_pages()
> +
> +    def _get_hugepage_size(self) -> int:
> +        hugepage_size = self.remote_session.send_command(
> +            "awk '/Hugepagesize/ {print $2}' /proc/meminfo"

+        ).stdout
> +        return int(hugepage_size)
> +
> +    def _get_hugepages_total(self) -> int:
> +        hugepages_total = self.remote_session.send_command(
> +            "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"

+        ).stdout
> +        return int(hugepages_total)
> +
> +    def _get_numa_nodes(self) -> list[int]:
> +        try:
> +            numa_range = self.remote_session.send_command(
> +                "cat /sys/devices/system/node/online", verify=True

+            ).stdout
> +            numa_range = expand_range(numa_range)
> +        except RemoteCommandExecutionError:
> +            # the file doesn't exist, meaning the node doesn't support
> numa
> +            numa_range = []
> +        return numa_range
> +
> +    def _mount_huge_pages(self) -> None:
> +        self.logger.info("Re-mounting Hugepages.")
> +        hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"

+        self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
> +        result = self.remote_session.send_command(hugapge_fs_cmd)
> +        if result.stdout == "":
> +            remote_mount_path = "/mnt/huge"
> +            self.remote_session.send_command(f"mkdir -p
> {remote_mount_path}")
> +            self.remote_session.send_command(
> +                f"mount -t hugetlbfs nodev {remote_mount_path}"
> +            )
> +
> +    def _supports_numa(self) -> bool:
> +        # the system supports numa if self._numa_nodes is non-empty and
> there are more
> +        # than one numa node (in the latter case it may actually support
> numa, but
> +        # there's no reason to do any numa specific configuration)
> +        return len(self._numa_nodes) > 1
> +
> +    def _configure_huge_pages(
> +        self, amount: int, size: int, force_first_numa: bool
> +    ) -> None:

+        self.logger.info("Configuring Hugepages.")
> +        hugepage_config_path = (
> +            f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
> +        )
> +        if force_first_numa and self._supports_numa():
> +            # clear non-numa hugepages
> +            self.remote_session.send_command(
> +                f"echo 0 | sudo tee {hugepage_config_path}"
> +            )
> +            hugepage_config_path = (
> +
> f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
> +                f"/hugepages-{size}kB/nr_hugepages"
> +            )
> +
> +        self.remote_session.send_command(
> +            f"echo {amount} | sudo tee {hugepage_config_path}"
> +        )
> diff --git a/dts/framework/remote_session/os/os_session.py
> b/dts/framework/remote_session/os/os_session.py
> index 6f6b6a979e..f84f3ce63c 100644
> --- a/dts/framework/remote_session/os/os_session.py
> +++ b/dts/framework/remote_session/os/os_session.py
> @@ -144,3 +144,13 @@ def kill_cleanup_dpdk_apps(self, dpdk_prefix_list:
> Iterable[str]) -> None:
>          Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
>          dpdk_prefix_list is empty, attempt to find running DPDK apps to
> kill and clean.
>          """
> +
> +    @abstractmethod
> +    def setup_hugepages(
> +        self, hugepage_amount: int = -1, force_first_numa: bool = False
> +    ) -> None:
> +        """
> +        Get the node's Hugepage Size, configure the specified amount of
> hugepages
> +        if needed and mount the hugepages if needed.
> +        If force_first_numa is True, configure hugepages just on the
> first socket.
> +        """
> diff --git a/dts/framework/testbed_model/node/node.py
> b/dts/framework/testbed_model/node/node.py
> index 5ee7023335..96a1724f4c 100644
> --- a/dts/framework/testbed_model/node/node.py
> +++ b/dts/framework/testbed_model/node/node.py
> @@ -16,7 +16,7 @@
>  )
>  from framework.exception import NodeCleanupError, NodeSetupError,
> convert_exception
>  from framework.logger import DTSLOG, getLogger
> -from framework.remote_session import OSSession, create_session
> +from framework.remote_session import Arch, OSSession, create_arch,
> create_session
>  from framework.testbed_model.hw import CPUAmount, cpu_filter
>
>
> @@ -33,6 +33,7 @@ class Node(object):
>      config: NodeConfiguration
>      cpus: list[CPU]
>      _other_sessions: list[OSSession]
> +    _arch: Arch
>
>      def __init__(self, node_config: NodeConfiguration):
>          self.config = node_config
> @@ -42,6 +43,7 @@ def __init__(self, node_config: NodeConfiguration):
>          self.logger = getLogger(self.name)
>          self.logger.info(f"Created node: {self.name}")
>          self.main_session = create_session(self.config, self.name,
> self.logger)
> +        self._arch = create_arch(self.config)
>          self._get_remote_cpus()
>
>      @convert_exception(NodeSetupError)
> @@ -50,6 +52,7 @@ def setup_execution(self, execution_config:
> ExecutionConfiguration) -> None:
>          Perform the execution setup that will be done for each execution
>          this node is part of.
>          """
> +        self._setup_hugepages()
>          self._setup_execution(execution_config)
>
>      def _setup_execution(self, execution_config: ExecutionConfiguration)
> -> None:
> @@ -145,6 +148,16 @@ def _get_remote_cpus(self) -> None:
>          self.logger.info("Getting CPU information.")
>          self.cpus =
> self.main_session.get_remote_cpus(self.config.bypass_core0)
>
> +    def _setup_hugepages(self):
> +        """
> +        Setup hugepages on the Node. Different architectures can supply
> different
> +        amounts of memory for hugepages and numa-based hugepage
> allocation may need
> +        to be considered.
> +        """
> +        self.main_session.setup_hugepages(
> +            self._arch.default_hugepage_memory,
> self._arch.hugepage_force_first_numa
> +        )
> +
>      def close(self) -> None:
>          """
>          Close all connections and free other resources.
> --
> 2.30.2
>
>
On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech>
wrote:

> Setup hugepages on nodes. This is useful not only on SUT nodes, but
> also on TG nodes which use TGs that utilize hugepages.
>
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
>  dts/framework/remote_session/__init__.py      |  1 +
>  dts/framework/remote_session/arch/__init__.py | 20 +++++
>  dts/framework/remote_session/arch/arch.py     | 57 +++++++++++++
>  .../remote_session/os/linux_session.py        | 85 +++++++++++++++++++
>  dts/framework/remote_session/os/os_session.py | 10 +++
>  dts/framework/testbed_model/node/node.py      | 15 +++-
>  6 files changed, 187 insertions(+), 1 deletion(-)
>  create mode 100644 dts/framework/remote_session/arch/__init__.py
>  create mode 100644 dts/framework/remote_session/arch/arch.py
>
> diff --git a/dts/framework/remote_session/__init__.py
> b/dts/framework/remote_session/__init__.py
> index f2339b20bd..f0deeadac6 100644
> --- a/dts/framework/remote_session/__init__.py
> +++ b/dts/framework/remote_session/__init__.py
> @@ -11,4 +11,5 @@
>
>  # pylama:ignore=W0611
>
> +from .arch import Arch, create_arch
>  from .os import OSSession, create_session
> diff --git a/dts/framework/remote_session/arch/__init__.py
> b/dts/framework/remote_session/arch/__init__.py
> new file mode 100644
> index 0000000000..d78ad42ac5
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/__init__.py
> @@ -0,0 +1,20 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +from framework.config import Architecture, NodeConfiguration
> +
> +from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
> +
> +
> +def create_arch(node_config: NodeConfiguration) -> Arch:
> +    match node_config.arch:
> +        case Architecture.x86_64:
> +            return x86_64()
> +        case Architecture.x86_32:
> +            return x86_32()
> +        case Architecture.i686:
> +            return i686()
> +        case Architecture.ppc64le:
> +            return PPC64()
> +        case Architecture.arm64:
> +            return Arm64()
> diff --git a/dts/framework/remote_session/arch/arch.py
> b/dts/framework/remote_session/arch/arch.py
> new file mode 100644
> index 0000000000..05c7602def
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/arch.py
> @@ -0,0 +1,57 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +
> +class Arch(object):
> +    """
> +    Stores architecture-specific information.
> +    """
> +
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        """
> +        Return the default amount of memory allocated for hugepages DPDK
> will use.
> +        The default is an amount equal to 256 2MB hugepages (512MB
> memory).
> +        """
> +        return 256 * 2048
> +
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        """
> +        An architecture may need to force configuration of hugepages to
> first socket.
> +        """
> +        return False
> +
> +
> +class x86_64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 4096 * 2048
> +
> +
> +class x86_32(Arch):
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        return True
> +
> +
> +class i686(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 512 * 2048
> +
> +    @property
> +    def hugepage_force_first_numa(self) -> bool:
> +        return True
> +
> +
> +class PPC64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 512 * 2048
> +
> +
> +class Arm64(Arch):
> +    @property
> +    def default_hugepage_memory(self) -> int:
> +        return 2048 * 2048
> diff --git a/dts/framework/remote_session/os/linux_session.py
> b/dts/framework/remote_session/os/linux_session.py
> index 21f117b714..fad33d7613 100644
> --- a/dts/framework/remote_session/os/linux_session.py
> +++ b/dts/framework/remote_session/os/linux_session.py
> @@ -3,6 +3,8 @@
>  # Copyright(c) 2022 University of New Hampshire
>
>  from framework.config import CPU
> +from framework.exception import RemoteCommandExecutionError
> +from framework.utils import expand_range
>
>  from .posix_session import PosixSession
>
> @@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) ->
> list[CPU]:
>                  continue
>              cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
>          return cpus
> +
> +    def setup_hugepages(
> +        self, hugepage_amount: int = -1, force_first_numa: bool = False
> +    ) -> None:
> +        self.logger.info("Getting Hugepage information.")
> +        hugepage_size = self._get_hugepage_size()
> +        hugepages_total = self._get_hugepages_total()
> +        self._numa_nodes = self._get_numa_nodes()
> +
> +        target_hugepages_total = int(hugepage_amount / hugepage_size)
> +        if hugepage_amount % hugepage_size:
> +            target_hugepages_total += 1
> +        if force_first_numa or hugepages_total != target_hugepages_total:
> +            # when forcing numa, we need to clear existing hugepages
> regardless
> +            # of size, so they can be moved to the first numa node
> +            self._configure_huge_pages(
> +                target_hugepages_total, hugepage_size, force_first_numa
> +            )
> +        else:
> +            self.logger.info("Hugepages already configured.")
> +        self._mount_huge_pages()
> +
> +    def _get_hugepage_size(self) -> int:
> +        hugepage_size = self.remote_session.send_command(
> +            "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
> +        ).stdout
> +        return int(hugepage_size)
> +
> +    def _get_hugepages_total(self) -> int:
> +        hugepages_total = self.remote_session.send_command(
> +            "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
> +        ).stdout
> +        return int(hugepages_total)
> +
> +    def _get_numa_nodes(self) -> list[int]:
> +        try:
> +            numa_range = self.remote_session.send_command(
> +                "cat /sys/devices/system/node/online", verify=True
> +            ).stdout
> +            numa_range = expand_range(numa_range)
> +        except RemoteCommandExecutionError:
> +            # the file doesn't exist, meaning the node doesn't support
> numa
> +            numa_range = []
> +        return numa_range
> +
> +    def _mount_huge_pages(self) -> None:
> +        self.logger.info("Re-mounting Hugepages.")
> +        hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
> +        self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
> +        result = self.remote_session.send_command(hugapge_fs_cmd)
> +        if result.stdout == "":
> +            remote_mount_path = "/mnt/huge"
> +            self.remote_session.send_command(f"mkdir -p
> {remote_mount_path}")
> +            self.remote_session.send_command(
> +                f"mount -t hugetlbfs nodev {remote_mount_path}"
> +            )
> +
> +    def _supports_numa(self) -> bool:
> +        # the system supports numa if self._numa_nodes is non-empty and
> there are more
> +        # than one numa node (in the latter case it may actually support
> numa, but
> +        # there's no reason to do any numa specific configuration)
> +        return len(self._numa_nodes) > 1
> +
> +    def _configure_huge_pages(
> +        self, amount: int, size: int, force_first_numa: bool
> +    ) -> None:
> +        self.logger.info("Configuring Hugepages.")
> +        hugepage_config_path = (
> +            f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
> +        )
> +        if force_first_numa and self._supports_numa():
> +            # clear non-numa hugepages
> +            self.remote_session.send_command(
> +                f"echo 0 | sudo tee {hugepage_config_path}"
> +            )
> +            hugepage_config_path = (
> +
> f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
> +                f"/hugepages-{size}kB/nr_hugepages"
> +            )
> +
> +        self.remote_session.send_command(
> +            f"echo {amount} | sudo tee {hugepage_config_path}"
> +        )
> diff --git a/dts/framework/remote_session/os/os_session.py
> b/dts/framework/remote_session/os/os_session.py
> index 6f6b6a979e..f84f3ce63c 100644
> --- a/dts/framework/remote_session/os/os_session.py
> +++ b/dts/framework/remote_session/os/os_session.py
> @@ -144,3 +144,13 @@ def kill_cleanup_dpdk_apps(self, dpdk_prefix_list:
> Iterable[str]) -> None:
>          Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
>          dpdk_prefix_list is empty, attempt to find running DPDK apps to
> kill and clean.
>          """
> +
> +    @abstractmethod
> +    def setup_hugepages(
> +        self, hugepage_amount: int = -1, force_first_numa: bool = False
> +    ) -> None:
> +        """
> +        Get the node's Hugepage Size, configure the specified amount of
> hugepages
> +        if needed and mount the hugepages if needed.
> +        If force_first_numa is True, configure hugepages just on the
> first socket.
> +        """
> diff --git a/dts/framework/testbed_model/node/node.py
> b/dts/framework/testbed_model/node/node.py
> index 5ee7023335..96a1724f4c 100644
> --- a/dts/framework/testbed_model/node/node.py
> +++ b/dts/framework/testbed_model/node/node.py
> @@ -16,7 +16,7 @@
>  )
>  from framework.exception import NodeCleanupError, NodeSetupError,
> convert_exception
>  from framework.logger import DTSLOG, getLogger
> -from framework.remote_session import OSSession, create_session
> +from framework.remote_session import Arch, OSSession, create_arch,
> create_session
>  from framework.testbed_model.hw import CPUAmount, cpu_filter
>
>
> @@ -33,6 +33,7 @@ class Node(object):
>      config: NodeConfiguration
>      cpus: list[CPU]
>      _other_sessions: list[OSSession]
> +    _arch: Arch
>
>      def __init__(self, node_config: NodeConfiguration):
>          self.config = node_config
> @@ -42,6 +43,7 @@ def __init__(self, node_config: NodeConfiguration):
>          self.logger = getLogger(self.name)
>          self.logger.info(f"Created node: {self.name}")
>          self.main_session = create_session(self.config, self.name,
> self.logger)
> +        self._arch = create_arch(self.config)
>          self._get_remote_cpus()
>
>      @convert_exception(NodeSetupError)
> @@ -50,6 +52,7 @@ def setup_execution(self, execution_config:
> ExecutionConfiguration) -> None:
>          Perform the execution setup that will be done for each execution
>          this node is part of.
>          """
> +        self._setup_hugepages()
>          self._setup_execution(execution_config)
>
>      def _setup_execution(self, execution_config: ExecutionConfiguration)
> -> None:
> @@ -145,6 +148,16 @@ def _get_remote_cpus(self) -> None:
>          self.logger.info("Getting CPU information.")
>          self.cpus =
> self.main_session.get_remote_cpus(self.config.bypass_core0)
>
> +    def _setup_hugepages(self):
> +        """
> +        Setup hugepages on the Node. Different architectures can supply
> different
> +        amounts of memory for hugepages and numa-based hugepage
> allocation may need
> +        to be considered.
> +        """
> +        self.main_session.setup_hugepages(
> +            self._arch.default_hugepage_memory,
> self._arch.hugepage_force_first_numa
> +        )
> +
>      def close(self) -> None:
>          """
>          Close all connections and free other resources.
> --
> 2.30.2
>
>
  
Juraj Linkeš Nov. 23, 2022, 1:58 p.m. UTC | #2
From: Owen Hilyard <ohilyard@iol.unh.edu>
Sent: Wednesday, November 16, 2022 2:48 PM
To: Juraj Linkeš <juraj.linkes@pantheon.tech>
Cc: thomas@monjalon.net; Honnappa.Nagarahalli@arm.com; lijuan.tu@intel.com; bruce.richardson@intel.com; dev@dpdk.org
Subject: Re: [RFC PATCH v2 05/10] dts: add node memory setup



On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech<mailto:juraj.linkes@pantheon.tech>> wrote:
Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech<mailto:juraj.linkes@pantheon.tech>>
---
 dts/framework/remote_session/__init__.py      |  1 +
 dts/framework/remote_session/arch/__init__.py | 20 +++++
 dts/framework/remote_session/arch/arch.py     | 57 +++++++++++++
 .../remote_session/os/linux_session.py        | 85 +++++++++++++++++++
 dts/framework/remote_session/os/os_session.py | 10 +++
 dts/framework/testbed_model/node/node.py      | 15 +++-
 6 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 dts/framework/remote_session/arch/__init__.py
 create mode 100644 dts/framework/remote_session/arch/arch.py

diff --git a/dts/framework/remote_session/__init__.py b/dts/framework/remote_session/__init__.py
index f2339b20bd..f0deeadac6 100644
--- a/dts/framework/remote_session/__init__.py
+++ b/dts/framework/remote_session/__init__.py
@@ -11,4 +11,5 @@

 # pylama:ignore=W0611

+from .arch import Arch, create_arch
 from .os import OSSession, create_session
diff --git a/dts/framework/remote_session/arch/__init__.py b/dts/framework/remote_session/arch/__init__.py
new file mode 100644
index 0000000000..d78ad42ac5
--- /dev/null
+++ b/dts/framework/remote_session/arch/__init__.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+from framework.config import Architecture, NodeConfiguration
+
+from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
+
+
+def create_arch(node_config: NodeConfiguration) -> Arch:
+    match node_config.arch:
+        case Architecture.x86_64:
+            return x86_64()
+        case Architecture.x86_32:
+            return x86_32()
+        case Architecture.i686:
+            return i686()
+        case Architecture.ppc64le:
+            return PPC64()
+        case Architecture.arm64:
+            return Arm64()
diff --git a/dts/framework/remote_session/arch/arch.py b/dts/framework/remote_session/arch/arch.py
new file mode 100644
index 0000000000..05c7602def
--- /dev/null
+++ b/dts/framework/remote_session/arch/arch.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+
+class Arch(object):
+    """
+    Stores architecture-specific information.
+    """
+
+    @property
+    def default_hugepage_memory(self) -> int:
+        """
+        Return the default amount of memory allocated for hugepages DPDK will use.
+        The default is an amount equal to 256 2MB hugepages (512MB memory).
+        """
+        return 256 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        """
+        An architecture may need to force configuration of hugepages to first socket.
+        """
+        return False
+
+
+class x86_64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 4096 * 2048
+
+
+class x86_32(Arch):
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class i686(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class PPC64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+
+class Arm64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 2048 * 2048
diff --git a/dts/framework/remote_session/os/linux_session.py b/dts/framework/remote_session/os/linux_session.py
index 21f117b714..fad33d7613 100644
--- a/dts/framework/remote_session/os/linux_session.py
+++ b/dts/framework/remote_session/os/linux_session.py
@@ -3,6 +3,8 @@
 # Copyright(c) 2022 University of New Hampshire

 from framework.config import CPU
+from framework.exception import RemoteCommandExecutionError
+from framework.utils import expand_range

 from .posix_session import PosixSession

@@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) -> list[CPU]:
                 continue
             cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
         return cpus
+
+    def setup_hugepages(
+        self, hugepage_amount: int = -1, force_first_numa: bool = False

I think that hugepage_amount: int | None = None is better, since it expresses it is an optional argument and the type checker will force anyone using the value to check if it is none, whereas that will not happen with -1.

This is actually a remnant from original DTS, where -1 meant use per-arch default. I've addressed this default elsewhere in the code, so I'll remove the default for this argument (making it mandatory).
  

Patch

diff --git a/dts/framework/remote_session/__init__.py b/dts/framework/remote_session/__init__.py
index f2339b20bd..f0deeadac6 100644
--- a/dts/framework/remote_session/__init__.py
+++ b/dts/framework/remote_session/__init__.py
@@ -11,4 +11,5 @@ 
 
 # pylama:ignore=W0611
 
+from .arch import Arch, create_arch
 from .os import OSSession, create_session
diff --git a/dts/framework/remote_session/arch/__init__.py b/dts/framework/remote_session/arch/__init__.py
new file mode 100644
index 0000000000..d78ad42ac5
--- /dev/null
+++ b/dts/framework/remote_session/arch/__init__.py
@@ -0,0 +1,20 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+from framework.config import Architecture, NodeConfiguration
+
+from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
+
+
+def create_arch(node_config: NodeConfiguration) -> Arch:
+    match node_config.arch:
+        case Architecture.x86_64:
+            return x86_64()
+        case Architecture.x86_32:
+            return x86_32()
+        case Architecture.i686:
+            return i686()
+        case Architecture.ppc64le:
+            return PPC64()
+        case Architecture.arm64:
+            return Arm64()
diff --git a/dts/framework/remote_session/arch/arch.py b/dts/framework/remote_session/arch/arch.py
new file mode 100644
index 0000000000..05c7602def
--- /dev/null
+++ b/dts/framework/remote_session/arch/arch.py
@@ -0,0 +1,57 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+
+class Arch(object):
+    """
+    Stores architecture-specific information.
+    """
+
+    @property
+    def default_hugepage_memory(self) -> int:
+        """
+        Return the default amount of memory allocated for hugepages DPDK will use.
+        The default is an amount equal to 256 2MB hugepages (512MB memory).
+        """
+        return 256 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        """
+        An architecture may need to force configuration of hugepages to first socket.
+        """
+        return False
+
+
+class x86_64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 4096 * 2048
+
+
+class x86_32(Arch):
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class i686(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+    @property
+    def hugepage_force_first_numa(self) -> bool:
+        return True
+
+
+class PPC64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 512 * 2048
+
+
+class Arm64(Arch):
+    @property
+    def default_hugepage_memory(self) -> int:
+        return 2048 * 2048
diff --git a/dts/framework/remote_session/os/linux_session.py b/dts/framework/remote_session/os/linux_session.py
index 21f117b714..fad33d7613 100644
--- a/dts/framework/remote_session/os/linux_session.py
+++ b/dts/framework/remote_session/os/linux_session.py
@@ -3,6 +3,8 @@ 
 # Copyright(c) 2022 University of New Hampshire
 
 from framework.config import CPU
+from framework.exception import RemoteCommandExecutionError
+from framework.utils import expand_range
 
 from .posix_session import PosixSession
 
@@ -24,3 +26,86 @@  def get_remote_cpus(self, bypass_core0: bool) -> list[CPU]:
                 continue
             cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
         return cpus
+
+    def setup_hugepages(
+        self, hugepage_amount: int = -1, force_first_numa: bool = False
+    ) -> None:
+        self.logger.info("Getting Hugepage information.")
+        hugepage_size = self._get_hugepage_size()
+        hugepages_total = self._get_hugepages_total()
+        self._numa_nodes = self._get_numa_nodes()
+
+        target_hugepages_total = int(hugepage_amount / hugepage_size)
+        if hugepage_amount % hugepage_size:
+            target_hugepages_total += 1
+        if force_first_numa or hugepages_total != target_hugepages_total:
+            # when forcing numa, we need to clear existing hugepages regardless
+            # of size, so they can be moved to the first numa node
+            self._configure_huge_pages(
+                target_hugepages_total, hugepage_size, force_first_numa
+            )
+        else:
+            self.logger.info("Hugepages already configured.")
+        self._mount_huge_pages()
+
+    def _get_hugepage_size(self) -> int:
+        hugepage_size = self.remote_session.send_command(
+            "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
+        ).stdout
+        return int(hugepage_size)
+
+    def _get_hugepages_total(self) -> int:
+        hugepages_total = self.remote_session.send_command(
+            "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+        ).stdout
+        return int(hugepages_total)
+
+    def _get_numa_nodes(self) -> list[int]:
+        try:
+            numa_range = self.remote_session.send_command(
+                "cat /sys/devices/system/node/online", verify=True
+            ).stdout
+            numa_range = expand_range(numa_range)
+        except RemoteCommandExecutionError:
+            # the file doesn't exist, meaning the node doesn't support numa
+            numa_range = []
+        return numa_range
+
+    def _mount_huge_pages(self) -> None:
+        self.logger.info("Re-mounting Hugepages.")
+        hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
+        self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
+        result = self.remote_session.send_command(hugapge_fs_cmd)
+        if result.stdout == "":
+            remote_mount_path = "/mnt/huge"
+            self.remote_session.send_command(f"mkdir -p {remote_mount_path}")
+            self.remote_session.send_command(
+                f"mount -t hugetlbfs nodev {remote_mount_path}"
+            )
+
+    def _supports_numa(self) -> bool:
+        # the system supports numa if self._numa_nodes is non-empty and there are more
+        # than one numa node (in the latter case it may actually support numa, but
+        # there's no reason to do any numa specific configuration)
+        return len(self._numa_nodes) > 1
+
+    def _configure_huge_pages(
+        self, amount: int, size: int, force_first_numa: bool
+    ) -> None:
+        self.logger.info("Configuring Hugepages.")
+        hugepage_config_path = (
+            f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
+        )
+        if force_first_numa and self._supports_numa():
+            # clear non-numa hugepages
+            self.remote_session.send_command(
+                f"echo 0 | sudo tee {hugepage_config_path}"
+            )
+            hugepage_config_path = (
+                f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
+                f"/hugepages-{size}kB/nr_hugepages"
+            )
+
+        self.remote_session.send_command(
+            f"echo {amount} | sudo tee {hugepage_config_path}"
+        )
diff --git a/dts/framework/remote_session/os/os_session.py b/dts/framework/remote_session/os/os_session.py
index 6f6b6a979e..f84f3ce63c 100644
--- a/dts/framework/remote_session/os/os_session.py
+++ b/dts/framework/remote_session/os/os_session.py
@@ -144,3 +144,13 @@  def kill_cleanup_dpdk_apps(self, dpdk_prefix_list: Iterable[str]) -> None:
         Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
         dpdk_prefix_list is empty, attempt to find running DPDK apps to kill and clean.
         """
+
+    @abstractmethod
+    def setup_hugepages(
+        self, hugepage_amount: int = -1, force_first_numa: bool = False
+    ) -> None:
+        """
+        Get the node's Hugepage Size, configure the specified amount of hugepages
+        if needed and mount the hugepages if needed.
+        If force_first_numa is True, configure hugepages just on the first socket.
+        """
diff --git a/dts/framework/testbed_model/node/node.py b/dts/framework/testbed_model/node/node.py
index 5ee7023335..96a1724f4c 100644
--- a/dts/framework/testbed_model/node/node.py
+++ b/dts/framework/testbed_model/node/node.py
@@ -16,7 +16,7 @@ 
 )
 from framework.exception import NodeCleanupError, NodeSetupError, convert_exception
 from framework.logger import DTSLOG, getLogger
-from framework.remote_session import OSSession, create_session
+from framework.remote_session import Arch, OSSession, create_arch, create_session
 from framework.testbed_model.hw import CPUAmount, cpu_filter
 
 
@@ -33,6 +33,7 @@  class Node(object):
     config: NodeConfiguration
     cpus: list[CPU]
     _other_sessions: list[OSSession]
+    _arch: Arch
 
     def __init__(self, node_config: NodeConfiguration):
         self.config = node_config
@@ -42,6 +43,7 @@  def __init__(self, node_config: NodeConfiguration):
         self.logger = getLogger(self.name)
         self.logger.info(f"Created node: {self.name}")
         self.main_session = create_session(self.config, self.name, self.logger)
+        self._arch = create_arch(self.config)
         self._get_remote_cpus()
 
     @convert_exception(NodeSetupError)
@@ -50,6 +52,7 @@  def setup_execution(self, execution_config: ExecutionConfiguration) -> None:
         Perform the execution setup that will be done for each execution
         this node is part of.
         """
+        self._setup_hugepages()
         self._setup_execution(execution_config)
 
     def _setup_execution(self, execution_config: ExecutionConfiguration) -> None:
@@ -145,6 +148,16 @@  def _get_remote_cpus(self) -> None:
         self.logger.info("Getting CPU information.")
         self.cpus = self.main_session.get_remote_cpus(self.config.bypass_core0)
 
+    def _setup_hugepages(self):
+        """
+        Setup hugepages on the Node. Different architectures can supply different
+        amounts of memory for hugepages and numa-based hugepage allocation may need
+        to be considered.
+        """
+        self.main_session.setup_hugepages(
+            self._arch.default_hugepage_memory, self._arch.hugepage_force_first_numa
+        )
+
     def close(self) -> None:
         """
         Close all connections and free other resources.