[RFC,v2,05/10] dts: add node memory setup
Checks
Commit Message
Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.
Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
dts/framework/remote_session/__init__.py | 1 +
dts/framework/remote_session/arch/__init__.py | 20 +++++
dts/framework/remote_session/arch/arch.py | 57 +++++++++++++
.../remote_session/os/linux_session.py | 85 +++++++++++++++++++
dts/framework/remote_session/os/os_session.py | 10 +++
dts/framework/testbed_model/node/node.py | 15 +++-
6 files changed, 187 insertions(+), 1 deletion(-)
create mode 100644 dts/framework/remote_session/arch/__init__.py
create mode 100644 dts/framework/remote_session/arch/arch.py
Comments
On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech>
wrote:
> Setup hugepages on nodes. This is useful not only on SUT nodes, but
> also on TG nodes which use TGs that utilize hugepages.
>
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
> dts/framework/remote_session/__init__.py | 1 +
> dts/framework/remote_session/arch/__init__.py | 20 +++++
> dts/framework/remote_session/arch/arch.py | 57 +++++++++++++
> .../remote_session/os/linux_session.py | 85 +++++++++++++++++++
> dts/framework/remote_session/os/os_session.py | 10 +++
> dts/framework/testbed_model/node/node.py | 15 +++-
> 6 files changed, 187 insertions(+), 1 deletion(-)
> create mode 100644 dts/framework/remote_session/arch/__init__.py
> create mode 100644 dts/framework/remote_session/arch/arch.py
>
> diff --git a/dts/framework/remote_session/__init__.py
> b/dts/framework/remote_session/__init__.py
> index f2339b20bd..f0deeadac6 100644
> --- a/dts/framework/remote_session/__init__.py
> +++ b/dts/framework/remote_session/__init__.py
> @@ -11,4 +11,5 @@
>
> # pylama:ignore=W0611
>
> +from .arch import Arch, create_arch
> from .os import OSSession, create_session
> diff --git a/dts/framework/remote_session/arch/__init__.py
> b/dts/framework/remote_session/arch/__init__.py
> new file mode 100644
> index 0000000000..d78ad42ac5
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/__init__.py
> @@ -0,0 +1,20 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +from framework.config import Architecture, NodeConfiguration
> +
> +from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
> +
> +
> +def create_arch(node_config: NodeConfiguration) -> Arch:
> + match node_config.arch:
> + case Architecture.x86_64:
> + return x86_64()
> + case Architecture.x86_32:
> + return x86_32()
> + case Architecture.i686:
> + return i686()
> + case Architecture.ppc64le:
> + return PPC64()
> + case Architecture.arm64:
> + return Arm64()
> diff --git a/dts/framework/remote_session/arch/arch.py
> b/dts/framework/remote_session/arch/arch.py
> new file mode 100644
> index 0000000000..05c7602def
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/arch.py
> @@ -0,0 +1,57 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +
> +class Arch(object):
> + """
> + Stores architecture-specific information.
> + """
> +
> + @property
> + def default_hugepage_memory(self) -> int:
> + """
> + Return the default amount of memory allocated for hugepages DPDK
> will use.
> + The default is an amount equal to 256 2MB hugepages (512MB
> memory).
> + """
> + return 256 * 2048
> +
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + """
> + An architecture may need to force configuration of hugepages to
> first socket.
> + """
> + return False
> +
> +
> +class x86_64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 4096 * 2048
> +
> +
> +class x86_32(Arch):
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + return True
> +
> +
> +class i686(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 512 * 2048
> +
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + return True
> +
> +
> +class PPC64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 512 * 2048
> +
> +
> +class Arm64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 2048 * 2048
> diff --git a/dts/framework/remote_session/os/linux_session.py
> b/dts/framework/remote_session/os/linux_session.py
> index 21f117b714..fad33d7613 100644
> --- a/dts/framework/remote_session/os/linux_session.py
> +++ b/dts/framework/remote_session/os/linux_session.py
> @@ -3,6 +3,8 @@
> # Copyright(c) 2022 University of New Hampshire
>
> from framework.config import CPU
> +from framework.exception import RemoteCommandExecutionError
> +from framework.utils import expand_range
>
> from .posix_session import PosixSession
>
> @@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) ->
> list[CPU]:
> continue
> cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
> return cpus
> +
> + def setup_hugepages(
> + self, hugepage_amount: int = -1, force_first_numa: bool = False
>
I think that hugepage_amount: int | None = None is better, since it
expresses it is an optional argument and the type checker will force anyone
using the value to check if it is none, whereas that will not happen with
-1.
> + ) -> None:
> + self.logger.info("Getting Hugepage information.")
> + hugepage_size = self._get_hugepage_size()
> + hugepages_total = self._get_hugepages_total()
> + self._numa_nodes = self._get_numa_nodes()
> +
> + target_hugepages_total = int(hugepage_amount / hugepage_size)
> + if hugepage_amount % hugepage_size:
> + target_hugepages_total += 1
> + if force_first_numa or hugepages_total != target_hugepages_total:
> + # when forcing numa, we need to clear existing hugepages
> regardless
> + # of size, so they can be moved to the first numa node
> + self._configure_huge_pages(
> + target_hugepages_total, hugepage_size, force_first_numa
> + )
> + else:
> + self.logger.info("Hugepages already configured.")
> + self._mount_huge_pages()
> +
> + def _get_hugepage_size(self) -> int:
> + hugepage_size = self.remote_session.send_command(
> + "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
+ ).stdout
> + return int(hugepage_size)
> +
> + def _get_hugepages_total(self) -> int:
> + hugepages_total = self.remote_session.send_command(
> + "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+ ).stdout
> + return int(hugepages_total)
> +
> + def _get_numa_nodes(self) -> list[int]:
> + try:
> + numa_range = self.remote_session.send_command(
> + "cat /sys/devices/system/node/online", verify=True
+ ).stdout
> + numa_range = expand_range(numa_range)
> + except RemoteCommandExecutionError:
> + # the file doesn't exist, meaning the node doesn't support
> numa
> + numa_range = []
> + return numa_range
> +
> + def _mount_huge_pages(self) -> None:
> + self.logger.info("Re-mounting Hugepages.")
> + hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
+ self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
> + result = self.remote_session.send_command(hugapge_fs_cmd)
> + if result.stdout == "":
> + remote_mount_path = "/mnt/huge"
> + self.remote_session.send_command(f"mkdir -p
> {remote_mount_path}")
> + self.remote_session.send_command(
> + f"mount -t hugetlbfs nodev {remote_mount_path}"
> + )
> +
> + def _supports_numa(self) -> bool:
> + # the system supports numa if self._numa_nodes is non-empty and
> there are more
> + # than one numa node (in the latter case it may actually support
> numa, but
> + # there's no reason to do any numa specific configuration)
> + return len(self._numa_nodes) > 1
> +
> + def _configure_huge_pages(
> + self, amount: int, size: int, force_first_numa: bool
> + ) -> None:
+ self.logger.info("Configuring Hugepages.")
> + hugepage_config_path = (
> + f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
> + )
> + if force_first_numa and self._supports_numa():
> + # clear non-numa hugepages
> + self.remote_session.send_command(
> + f"echo 0 | sudo tee {hugepage_config_path}"
> + )
> + hugepage_config_path = (
> +
> f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
> + f"/hugepages-{size}kB/nr_hugepages"
> + )
> +
> + self.remote_session.send_command(
> + f"echo {amount} | sudo tee {hugepage_config_path}"
> + )
> diff --git a/dts/framework/remote_session/os/os_session.py
> b/dts/framework/remote_session/os/os_session.py
> index 6f6b6a979e..f84f3ce63c 100644
> --- a/dts/framework/remote_session/os/os_session.py
> +++ b/dts/framework/remote_session/os/os_session.py
> @@ -144,3 +144,13 @@ def kill_cleanup_dpdk_apps(self, dpdk_prefix_list:
> Iterable[str]) -> None:
> Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
> dpdk_prefix_list is empty, attempt to find running DPDK apps to
> kill and clean.
> """
> +
> + @abstractmethod
> + def setup_hugepages(
> + self, hugepage_amount: int = -1, force_first_numa: bool = False
> + ) -> None:
> + """
> + Get the node's Hugepage Size, configure the specified amount of
> hugepages
> + if needed and mount the hugepages if needed.
> + If force_first_numa is True, configure hugepages just on the
> first socket.
> + """
> diff --git a/dts/framework/testbed_model/node/node.py
> b/dts/framework/testbed_model/node/node.py
> index 5ee7023335..96a1724f4c 100644
> --- a/dts/framework/testbed_model/node/node.py
> +++ b/dts/framework/testbed_model/node/node.py
> @@ -16,7 +16,7 @@
> )
> from framework.exception import NodeCleanupError, NodeSetupError,
> convert_exception
> from framework.logger import DTSLOG, getLogger
> -from framework.remote_session import OSSession, create_session
> +from framework.remote_session import Arch, OSSession, create_arch,
> create_session
> from framework.testbed_model.hw import CPUAmount, cpu_filter
>
>
> @@ -33,6 +33,7 @@ class Node(object):
> config: NodeConfiguration
> cpus: list[CPU]
> _other_sessions: list[OSSession]
> + _arch: Arch
>
> def __init__(self, node_config: NodeConfiguration):
> self.config = node_config
> @@ -42,6 +43,7 @@ def __init__(self, node_config: NodeConfiguration):
> self.logger = getLogger(self.name)
> self.logger.info(f"Created node: {self.name}")
> self.main_session = create_session(self.config, self.name,
> self.logger)
> + self._arch = create_arch(self.config)
> self._get_remote_cpus()
>
> @convert_exception(NodeSetupError)
> @@ -50,6 +52,7 @@ def setup_execution(self, execution_config:
> ExecutionConfiguration) -> None:
> Perform the execution setup that will be done for each execution
> this node is part of.
> """
> + self._setup_hugepages()
> self._setup_execution(execution_config)
>
> def _setup_execution(self, execution_config: ExecutionConfiguration)
> -> None:
> @@ -145,6 +148,16 @@ def _get_remote_cpus(self) -> None:
> self.logger.info("Getting CPU information.")
> self.cpus =
> self.main_session.get_remote_cpus(self.config.bypass_core0)
>
> + def _setup_hugepages(self):
> + """
> + Setup hugepages on the Node. Different architectures can supply
> different
> + amounts of memory for hugepages and numa-based hugepage
> allocation may need
> + to be considered.
> + """
> + self.main_session.setup_hugepages(
> + self._arch.default_hugepage_memory,
> self._arch.hugepage_force_first_numa
> + )
> +
> def close(self) -> None:
> """
> Close all connections and free other resources.
> --
> 2.30.2
>
>
On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech>
wrote:
> Setup hugepages on nodes. This is useful not only on SUT nodes, but
> also on TG nodes which use TGs that utilize hugepages.
>
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
> dts/framework/remote_session/__init__.py | 1 +
> dts/framework/remote_session/arch/__init__.py | 20 +++++
> dts/framework/remote_session/arch/arch.py | 57 +++++++++++++
> .../remote_session/os/linux_session.py | 85 +++++++++++++++++++
> dts/framework/remote_session/os/os_session.py | 10 +++
> dts/framework/testbed_model/node/node.py | 15 +++-
> 6 files changed, 187 insertions(+), 1 deletion(-)
> create mode 100644 dts/framework/remote_session/arch/__init__.py
> create mode 100644 dts/framework/remote_session/arch/arch.py
>
> diff --git a/dts/framework/remote_session/__init__.py
> b/dts/framework/remote_session/__init__.py
> index f2339b20bd..f0deeadac6 100644
> --- a/dts/framework/remote_session/__init__.py
> +++ b/dts/framework/remote_session/__init__.py
> @@ -11,4 +11,5 @@
>
> # pylama:ignore=W0611
>
> +from .arch import Arch, create_arch
> from .os import OSSession, create_session
> diff --git a/dts/framework/remote_session/arch/__init__.py
> b/dts/framework/remote_session/arch/__init__.py
> new file mode 100644
> index 0000000000..d78ad42ac5
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/__init__.py
> @@ -0,0 +1,20 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +from framework.config import Architecture, NodeConfiguration
> +
> +from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
> +
> +
> +def create_arch(node_config: NodeConfiguration) -> Arch:
> + match node_config.arch:
> + case Architecture.x86_64:
> + return x86_64()
> + case Architecture.x86_32:
> + return x86_32()
> + case Architecture.i686:
> + return i686()
> + case Architecture.ppc64le:
> + return PPC64()
> + case Architecture.arm64:
> + return Arm64()
> diff --git a/dts/framework/remote_session/arch/arch.py
> b/dts/framework/remote_session/arch/arch.py
> new file mode 100644
> index 0000000000..05c7602def
> --- /dev/null
> +++ b/dts/framework/remote_session/arch/arch.py
> @@ -0,0 +1,57 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 PANTHEON.tech s.r.o.
> +
> +
> +class Arch(object):
> + """
> + Stores architecture-specific information.
> + """
> +
> + @property
> + def default_hugepage_memory(self) -> int:
> + """
> + Return the default amount of memory allocated for hugepages DPDK
> will use.
> + The default is an amount equal to 256 2MB hugepages (512MB
> memory).
> + """
> + return 256 * 2048
> +
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + """
> + An architecture may need to force configuration of hugepages to
> first socket.
> + """
> + return False
> +
> +
> +class x86_64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 4096 * 2048
> +
> +
> +class x86_32(Arch):
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + return True
> +
> +
> +class i686(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 512 * 2048
> +
> + @property
> + def hugepage_force_first_numa(self) -> bool:
> + return True
> +
> +
> +class PPC64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 512 * 2048
> +
> +
> +class Arm64(Arch):
> + @property
> + def default_hugepage_memory(self) -> int:
> + return 2048 * 2048
> diff --git a/dts/framework/remote_session/os/linux_session.py
> b/dts/framework/remote_session/os/linux_session.py
> index 21f117b714..fad33d7613 100644
> --- a/dts/framework/remote_session/os/linux_session.py
> +++ b/dts/framework/remote_session/os/linux_session.py
> @@ -3,6 +3,8 @@
> # Copyright(c) 2022 University of New Hampshire
>
> from framework.config import CPU
> +from framework.exception import RemoteCommandExecutionError
> +from framework.utils import expand_range
>
> from .posix_session import PosixSession
>
> @@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) ->
> list[CPU]:
> continue
> cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
> return cpus
> +
> + def setup_hugepages(
> + self, hugepage_amount: int = -1, force_first_numa: bool = False
> + ) -> None:
> + self.logger.info("Getting Hugepage information.")
> + hugepage_size = self._get_hugepage_size()
> + hugepages_total = self._get_hugepages_total()
> + self._numa_nodes = self._get_numa_nodes()
> +
> + target_hugepages_total = int(hugepage_amount / hugepage_size)
> + if hugepage_amount % hugepage_size:
> + target_hugepages_total += 1
> + if force_first_numa or hugepages_total != target_hugepages_total:
> + # when forcing numa, we need to clear existing hugepages
> regardless
> + # of size, so they can be moved to the first numa node
> + self._configure_huge_pages(
> + target_hugepages_total, hugepage_size, force_first_numa
> + )
> + else:
> + self.logger.info("Hugepages already configured.")
> + self._mount_huge_pages()
> +
> + def _get_hugepage_size(self) -> int:
> + hugepage_size = self.remote_session.send_command(
> + "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
> + ).stdout
> + return int(hugepage_size)
> +
> + def _get_hugepages_total(self) -> int:
> + hugepages_total = self.remote_session.send_command(
> + "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
> + ).stdout
> + return int(hugepages_total)
> +
> + def _get_numa_nodes(self) -> list[int]:
> + try:
> + numa_range = self.remote_session.send_command(
> + "cat /sys/devices/system/node/online", verify=True
> + ).stdout
> + numa_range = expand_range(numa_range)
> + except RemoteCommandExecutionError:
> + # the file doesn't exist, meaning the node doesn't support
> numa
> + numa_range = []
> + return numa_range
> +
> + def _mount_huge_pages(self) -> None:
> + self.logger.info("Re-mounting Hugepages.")
> + hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
> + self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
> + result = self.remote_session.send_command(hugapge_fs_cmd)
> + if result.stdout == "":
> + remote_mount_path = "/mnt/huge"
> + self.remote_session.send_command(f"mkdir -p
> {remote_mount_path}")
> + self.remote_session.send_command(
> + f"mount -t hugetlbfs nodev {remote_mount_path}"
> + )
> +
> + def _supports_numa(self) -> bool:
> + # the system supports numa if self._numa_nodes is non-empty and
> there are more
> + # than one numa node (in the latter case it may actually support
> numa, but
> + # there's no reason to do any numa specific configuration)
> + return len(self._numa_nodes) > 1
> +
> + def _configure_huge_pages(
> + self, amount: int, size: int, force_first_numa: bool
> + ) -> None:
> + self.logger.info("Configuring Hugepages.")
> + hugepage_config_path = (
> + f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
> + )
> + if force_first_numa and self._supports_numa():
> + # clear non-numa hugepages
> + self.remote_session.send_command(
> + f"echo 0 | sudo tee {hugepage_config_path}"
> + )
> + hugepage_config_path = (
> +
> f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
> + f"/hugepages-{size}kB/nr_hugepages"
> + )
> +
> + self.remote_session.send_command(
> + f"echo {amount} | sudo tee {hugepage_config_path}"
> + )
> diff --git a/dts/framework/remote_session/os/os_session.py
> b/dts/framework/remote_session/os/os_session.py
> index 6f6b6a979e..f84f3ce63c 100644
> --- a/dts/framework/remote_session/os/os_session.py
> +++ b/dts/framework/remote_session/os/os_session.py
> @@ -144,3 +144,13 @@ def kill_cleanup_dpdk_apps(self, dpdk_prefix_list:
> Iterable[str]) -> None:
> Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
> dpdk_prefix_list is empty, attempt to find running DPDK apps to
> kill and clean.
> """
> +
> + @abstractmethod
> + def setup_hugepages(
> + self, hugepage_amount: int = -1, force_first_numa: bool = False
> + ) -> None:
> + """
> + Get the node's Hugepage Size, configure the specified amount of
> hugepages
> + if needed and mount the hugepages if needed.
> + If force_first_numa is True, configure hugepages just on the
> first socket.
> + """
> diff --git a/dts/framework/testbed_model/node/node.py
> b/dts/framework/testbed_model/node/node.py
> index 5ee7023335..96a1724f4c 100644
> --- a/dts/framework/testbed_model/node/node.py
> +++ b/dts/framework/testbed_model/node/node.py
> @@ -16,7 +16,7 @@
> )
> from framework.exception import NodeCleanupError, NodeSetupError,
> convert_exception
> from framework.logger import DTSLOG, getLogger
> -from framework.remote_session import OSSession, create_session
> +from framework.remote_session import Arch, OSSession, create_arch,
> create_session
> from framework.testbed_model.hw import CPUAmount, cpu_filter
>
>
> @@ -33,6 +33,7 @@ class Node(object):
> config: NodeConfiguration
> cpus: list[CPU]
> _other_sessions: list[OSSession]
> + _arch: Arch
>
> def __init__(self, node_config: NodeConfiguration):
> self.config = node_config
> @@ -42,6 +43,7 @@ def __init__(self, node_config: NodeConfiguration):
> self.logger = getLogger(self.name)
> self.logger.info(f"Created node: {self.name}")
> self.main_session = create_session(self.config, self.name,
> self.logger)
> + self._arch = create_arch(self.config)
> self._get_remote_cpus()
>
> @convert_exception(NodeSetupError)
> @@ -50,6 +52,7 @@ def setup_execution(self, execution_config:
> ExecutionConfiguration) -> None:
> Perform the execution setup that will be done for each execution
> this node is part of.
> """
> + self._setup_hugepages()
> self._setup_execution(execution_config)
>
> def _setup_execution(self, execution_config: ExecutionConfiguration)
> -> None:
> @@ -145,6 +148,16 @@ def _get_remote_cpus(self) -> None:
> self.logger.info("Getting CPU information.")
> self.cpus =
> self.main_session.get_remote_cpus(self.config.bypass_core0)
>
> + def _setup_hugepages(self):
> + """
> + Setup hugepages on the Node. Different architectures can supply
> different
> + amounts of memory for hugepages and numa-based hugepage
> allocation may need
> + to be considered.
> + """
> + self.main_session.setup_hugepages(
> + self._arch.default_hugepage_memory,
> self._arch.hugepage_force_first_numa
> + )
> +
> def close(self) -> None:
> """
> Close all connections and free other resources.
> --
> 2.30.2
>
>
From: Owen Hilyard <ohilyard@iol.unh.edu>
Sent: Wednesday, November 16, 2022 2:48 PM
To: Juraj Linkeš <juraj.linkes@pantheon.tech>
Cc: thomas@monjalon.net; Honnappa.Nagarahalli@arm.com; lijuan.tu@intel.com; bruce.richardson@intel.com; dev@dpdk.org
Subject: Re: [RFC PATCH v2 05/10] dts: add node memory setup
On Mon, Nov 14, 2022 at 11:54 AM Juraj Linkeš <juraj.linkes@pantheon.tech<mailto:juraj.linkes@pantheon.tech>> wrote:
Setup hugepages on nodes. This is useful not only on SUT nodes, but
also on TG nodes which use TGs that utilize hugepages.
Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech<mailto:juraj.linkes@pantheon.tech>>
---
dts/framework/remote_session/__init__.py | 1 +
dts/framework/remote_session/arch/__init__.py | 20 +++++
dts/framework/remote_session/arch/arch.py | 57 +++++++++++++
.../remote_session/os/linux_session.py | 85 +++++++++++++++++++
dts/framework/remote_session/os/os_session.py | 10 +++
dts/framework/testbed_model/node/node.py | 15 +++-
6 files changed, 187 insertions(+), 1 deletion(-)
create mode 100644 dts/framework/remote_session/arch/__init__.py
create mode 100644 dts/framework/remote_session/arch/arch.py
diff --git a/dts/framework/remote_session/__init__.py b/dts/framework/remote_session/__init__.py
index f2339b20bd..f0deeadac6 100644
--- a/dts/framework/remote_session/__init__.py
+++ b/dts/framework/remote_session/__init__.py
@@ -11,4 +11,5 @@
# pylama:ignore=W0611
+from .arch import Arch, create_arch
from .os import OSSession, create_session
diff --git a/dts/framework/remote_session/arch/__init__.py b/dts/framework/remote_session/arch/__init__.py
new file mode 100644
index 0000000000..d78ad42ac5
--- /dev/null
+++ b/dts/framework/remote_session/arch/__init__.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+from framework.config import Architecture, NodeConfiguration
+
+from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
+
+
+def create_arch(node_config: NodeConfiguration) -> Arch:
+ match node_config.arch:
+ case Architecture.x86_64:
+ return x86_64()
+ case Architecture.x86_32:
+ return x86_32()
+ case Architecture.i686:
+ return i686()
+ case Architecture.ppc64le:
+ return PPC64()
+ case Architecture.arm64:
+ return Arm64()
diff --git a/dts/framework/remote_session/arch/arch.py b/dts/framework/remote_session/arch/arch.py
new file mode 100644
index 0000000000..05c7602def
--- /dev/null
+++ b/dts/framework/remote_session/arch/arch.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+
+class Arch(object):
+ """
+ Stores architecture-specific information.
+ """
+
+ @property
+ def default_hugepage_memory(self) -> int:
+ """
+ Return the default amount of memory allocated for hugepages DPDK will use.
+ The default is an amount equal to 256 2MB hugepages (512MB memory).
+ """
+ return 256 * 2048
+
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ """
+ An architecture may need to force configuration of hugepages to first socket.
+ """
+ return False
+
+
+class x86_64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 4096 * 2048
+
+
+class x86_32(Arch):
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ return True
+
+
+class i686(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 512 * 2048
+
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ return True
+
+
+class PPC64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 512 * 2048
+
+
+class Arm64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 2048 * 2048
diff --git a/dts/framework/remote_session/os/linux_session.py b/dts/framework/remote_session/os/linux_session.py
index 21f117b714..fad33d7613 100644
--- a/dts/framework/remote_session/os/linux_session.py
+++ b/dts/framework/remote_session/os/linux_session.py
@@ -3,6 +3,8 @@
# Copyright(c) 2022 University of New Hampshire
from framework.config import CPU
+from framework.exception import RemoteCommandExecutionError
+from framework.utils import expand_range
from .posix_session import PosixSession
@@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) -> list[CPU]:
continue
cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
return cpus
+
+ def setup_hugepages(
+ self, hugepage_amount: int = -1, force_first_numa: bool = False
I think that hugepage_amount: int | None = None is better, since it expresses it is an optional argument and the type checker will force anyone using the value to check if it is none, whereas that will not happen with -1.
This is actually a remnant from original DTS, where -1 meant use per-arch default. I've addressed this default elsewhere in the code, so I'll remove the default for this argument (making it mandatory).
@@ -11,4 +11,5 @@
# pylama:ignore=W0611
+from .arch import Arch, create_arch
from .os import OSSession, create_session
new file mode 100644
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+from framework.config import Architecture, NodeConfiguration
+
+from .arch import PPC64, Arch, Arm64, i686, x86_32, x86_64
+
+
+def create_arch(node_config: NodeConfiguration) -> Arch:
+ match node_config.arch:
+ case Architecture.x86_64:
+ return x86_64()
+ case Architecture.x86_32:
+ return x86_32()
+ case Architecture.i686:
+ return i686()
+ case Architecture.ppc64le:
+ return PPC64()
+ case Architecture.arm64:
+ return Arm64()
new file mode 100644
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 PANTHEON.tech s.r.o.
+
+
+class Arch(object):
+ """
+ Stores architecture-specific information.
+ """
+
+ @property
+ def default_hugepage_memory(self) -> int:
+ """
+ Return the default amount of memory allocated for hugepages DPDK will use.
+ The default is an amount equal to 256 2MB hugepages (512MB memory).
+ """
+ return 256 * 2048
+
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ """
+ An architecture may need to force configuration of hugepages to first socket.
+ """
+ return False
+
+
+class x86_64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 4096 * 2048
+
+
+class x86_32(Arch):
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ return True
+
+
+class i686(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 512 * 2048
+
+ @property
+ def hugepage_force_first_numa(self) -> bool:
+ return True
+
+
+class PPC64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 512 * 2048
+
+
+class Arm64(Arch):
+ @property
+ def default_hugepage_memory(self) -> int:
+ return 2048 * 2048
@@ -3,6 +3,8 @@
# Copyright(c) 2022 University of New Hampshire
from framework.config import CPU
+from framework.exception import RemoteCommandExecutionError
+from framework.utils import expand_range
from .posix_session import PosixSession
@@ -24,3 +26,86 @@ def get_remote_cpus(self, bypass_core0: bool) -> list[CPU]:
continue
cpus.append(CPU(int(cpu), int(core), int(socket), int(node)))
return cpus
+
+ def setup_hugepages(
+ self, hugepage_amount: int = -1, force_first_numa: bool = False
+ ) -> None:
+ self.logger.info("Getting Hugepage information.")
+ hugepage_size = self._get_hugepage_size()
+ hugepages_total = self._get_hugepages_total()
+ self._numa_nodes = self._get_numa_nodes()
+
+ target_hugepages_total = int(hugepage_amount / hugepage_size)
+ if hugepage_amount % hugepage_size:
+ target_hugepages_total += 1
+ if force_first_numa or hugepages_total != target_hugepages_total:
+ # when forcing numa, we need to clear existing hugepages regardless
+ # of size, so they can be moved to the first numa node
+ self._configure_huge_pages(
+ target_hugepages_total, hugepage_size, force_first_numa
+ )
+ else:
+ self.logger.info("Hugepages already configured.")
+ self._mount_huge_pages()
+
+ def _get_hugepage_size(self) -> int:
+ hugepage_size = self.remote_session.send_command(
+ "awk '/Hugepagesize/ {print $2}' /proc/meminfo"
+ ).stdout
+ return int(hugepage_size)
+
+ def _get_hugepages_total(self) -> int:
+ hugepages_total = self.remote_session.send_command(
+ "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+ ).stdout
+ return int(hugepages_total)
+
+ def _get_numa_nodes(self) -> list[int]:
+ try:
+ numa_range = self.remote_session.send_command(
+ "cat /sys/devices/system/node/online", verify=True
+ ).stdout
+ numa_range = expand_range(numa_range)
+ except RemoteCommandExecutionError:
+ # the file doesn't exist, meaning the node doesn't support numa
+ numa_range = []
+ return numa_range
+
+ def _mount_huge_pages(self) -> None:
+ self.logger.info("Re-mounting Hugepages.")
+ hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
+ self.remote_session.send_command(f"umount $({hugapge_fs_cmd})")
+ result = self.remote_session.send_command(hugapge_fs_cmd)
+ if result.stdout == "":
+ remote_mount_path = "/mnt/huge"
+ self.remote_session.send_command(f"mkdir -p {remote_mount_path}")
+ self.remote_session.send_command(
+ f"mount -t hugetlbfs nodev {remote_mount_path}"
+ )
+
+ def _supports_numa(self) -> bool:
+ # the system supports numa if self._numa_nodes is non-empty and there are more
+ # than one numa node (in the latter case it may actually support numa, but
+ # there's no reason to do any numa specific configuration)
+ return len(self._numa_nodes) > 1
+
+ def _configure_huge_pages(
+ self, amount: int, size: int, force_first_numa: bool
+ ) -> None:
+ self.logger.info("Configuring Hugepages.")
+ hugepage_config_path = (
+ f"/sys/kernel/mm/hugepages/hugepages-{size}kB/nr_hugepages"
+ )
+ if force_first_numa and self._supports_numa():
+ # clear non-numa hugepages
+ self.remote_session.send_command(
+ f"echo 0 | sudo tee {hugepage_config_path}"
+ )
+ hugepage_config_path = (
+ f"/sys/devices/system/node/node{self._numa_nodes[0]}/hugepages"
+ f"/hugepages-{size}kB/nr_hugepages"
+ )
+
+ self.remote_session.send_command(
+ f"echo {amount} | sudo tee {hugepage_config_path}"
+ )
@@ -144,3 +144,13 @@ def kill_cleanup_dpdk_apps(self, dpdk_prefix_list: Iterable[str]) -> None:
Kill and cleanup all DPDK apps identified by dpdk_prefix_list. If
dpdk_prefix_list is empty, attempt to find running DPDK apps to kill and clean.
"""
+
+ @abstractmethod
+ def setup_hugepages(
+ self, hugepage_amount: int = -1, force_first_numa: bool = False
+ ) -> None:
+ """
+ Get the node's Hugepage Size, configure the specified amount of hugepages
+ if needed and mount the hugepages if needed.
+ If force_first_numa is True, configure hugepages just on the first socket.
+ """
@@ -16,7 +16,7 @@
)
from framework.exception import NodeCleanupError, NodeSetupError, convert_exception
from framework.logger import DTSLOG, getLogger
-from framework.remote_session import OSSession, create_session
+from framework.remote_session import Arch, OSSession, create_arch, create_session
from framework.testbed_model.hw import CPUAmount, cpu_filter
@@ -33,6 +33,7 @@ class Node(object):
config: NodeConfiguration
cpus: list[CPU]
_other_sessions: list[OSSession]
+ _arch: Arch
def __init__(self, node_config: NodeConfiguration):
self.config = node_config
@@ -42,6 +43,7 @@ def __init__(self, node_config: NodeConfiguration):
self.logger = getLogger(self.name)
self.logger.info(f"Created node: {self.name}")
self.main_session = create_session(self.config, self.name, self.logger)
+ self._arch = create_arch(self.config)
self._get_remote_cpus()
@convert_exception(NodeSetupError)
@@ -50,6 +52,7 @@ def setup_execution(self, execution_config: ExecutionConfiguration) -> None:
Perform the execution setup that will be done for each execution
this node is part of.
"""
+ self._setup_hugepages()
self._setup_execution(execution_config)
def _setup_execution(self, execution_config: ExecutionConfiguration) -> None:
@@ -145,6 +148,16 @@ def _get_remote_cpus(self) -> None:
self.logger.info("Getting CPU information.")
self.cpus = self.main_session.get_remote_cpus(self.config.bypass_core0)
+ def _setup_hugepages(self):
+ """
+ Setup hugepages on the Node. Different architectures can supply different
+ amounts of memory for hugepages and numa-based hugepage allocation may need
+ to be considered.
+ """
+ self.main_session.setup_hugepages(
+ self._arch.default_hugepage_memory, self._arch.hugepage_force_first_numa
+ )
+
def close(self) -> None:
"""
Close all connections and free other resources.