[v2] dts: Change hugepage runtime config to 2MB Exclusively
Checks
Commit Message
The previous implementation configures and allocates hugepage sizes
based on a system default. This can lead to two problems: overallocation of
hugepages (which may crash the remote host), and configuration of hugepage
sizes that are not recommended during runtime. This new implementation
allows only 2MB hugepage allocation during runtime; any other unique
hugepage size must be configured by the end-user for initializing DTS.
If the amount of 2MB hugepages requested exceeds the amount of 2MB
hugepages already configured on the system, then the system will remount
hugepages to cover the difference. If the amount of hugepages requested is
either less than or equal to the amount already configured on the system,
then nothing is done.
Bugzilla ID: 1370
Signed-off-by: Nicholas Pratte <npratte@iol.unh.edu>
Reviewed-by: Jeremy Spewock <jspewock@iol.unh.edu>
---
dts/conf.yaml | 4 ++--
dts/framework/config/__init__.py | 4 ++--
dts/framework/config/conf_yaml_schema.json | 6 +++---
dts/framework/config/types.py | 2 +-
dts/framework/testbed_model/linux_session.py | 15 ++++++---------
5 files changed, 14 insertions(+), 17 deletions(-)
Comments
> From: Nicholas Pratte [mailto:npratte@iol.unh.edu]
> Sent: Tuesday, 9 April 2024 19.28
>
> The previous implementation configures and allocates hugepage sizes
> based on a system default. This can lead to two problems: overallocation of
> hugepages (which may crash the remote host), and configuration of hugepage
> sizes that are not recommended during runtime. This new implementation
> allows only 2MB hugepage allocation during runtime; any other unique
> hugepage size must be configured by the end-user for initializing DTS.
>
> If the amount of 2MB hugepages requested exceeds the amount of 2MB
> hugepages already configured on the system, then the system will remount
> hugepages to cover the difference. If the amount of hugepages requested is
> either less than or equal to the amount already configured on the system,
> then nothing is done.
>
> Bugzilla ID: 1370
> Signed-off-by: Nicholas Pratte <npratte@iol.unh.edu>
> Reviewed-by: Jeremy Spewock <jspewock@iol.unh.edu>
> ---
> dts/conf.yaml | 4 ++--
> dts/framework/config/__init__.py | 4 ++--
> dts/framework/config/conf_yaml_schema.json | 6 +++---
> dts/framework/config/types.py | 2 +-
> dts/framework/testbed_model/linux_session.py | 15 ++++++---------
> 5 files changed, 14 insertions(+), 17 deletions(-)
>
> diff --git a/dts/conf.yaml b/dts/conf.yaml
> index 8068345dd5..56c3ae6f4c 100644
> --- a/dts/conf.yaml
> +++ b/dts/conf.yaml
> @@ -35,7 +35,7 @@ nodes:
> lcores: "" # use all the available logical cores
> use_first_core: false # tells DPDK to use any physical core
> memory_channels: 4 # tells DPDK to use 4 memory channels
> - hugepages: # optional; if removed, will use system hugepage
> configuration
> + hugepages_2mb: # optional; if removed, will use system hugepage
> configuration
> amount: 256
> force_first_numa: false
> ports:
> @@ -71,7 +71,7 @@ nodes:
> os_driver: rdma
> peer_node: "SUT 1"
> peer_pci: "0000:00:08.1"
> - hugepages: # optional; if removed, will use system hugepage
> configuration
> + hugepages_2mb: # optional; if removed, will use system hugepage
> configuration
> amount: 256
> force_first_numa: false
> traffic_generator:
> diff --git a/dts/framework/config/__init__.py
> b/dts/framework/config/__init__.py
> index 4cb5c74059..b6f820e39e 100644
> --- a/dts/framework/config/__init__.py
> +++ b/dts/framework/config/__init__.py
> @@ -255,8 +255,8 @@ def from_dict(
> Either an SUT or TG configuration instance.
> """
> hugepage_config = None
> - if "hugepages" in d:
> - hugepage_config_dict = d["hugepages"]
> + if "hugepages_2mb" in d:
> + hugepage_config_dict = d["hugepages_2mb"]
> if "force_first_numa" not in hugepage_config_dict:
> hugepage_config_dict["force_first_numa"] = False
> hugepage_config = HugepageConfiguration(**hugepage_config_dict)
> diff --git a/dts/framework/config/conf_yaml_schema.json
> b/dts/framework/config/conf_yaml_schema.json
> index 4731f4511d..f4d7199523 100644
> --- a/dts/framework/config/conf_yaml_schema.json
> +++ b/dts/framework/config/conf_yaml_schema.json
> @@ -146,7 +146,7 @@
> "compiler"
> ]
> },
> - "hugepages": {
> + "hugepages_2mb": {
> "type": "object",
> "description": "Optional hugepage configuration. If not specified,
> hugepages won't be configured and DTS will use system configuration.",
> "properties": {
> @@ -253,8 +253,8 @@
> "type": "integer",
> "description": "How many memory channels to use. Optional,
> defaults to 1."
> },
> - "hugepages": {
> - "$ref": "#/definitions/hugepages"
> + "hugepages_2mb": {
> + "$ref": "#/definitions/hugepages_2mb"
> },
> "ports": {
> "type": "array",
> diff --git a/dts/framework/config/types.py b/dts/framework/config/types.py
> index 1927910d88..016e0c3dbd 100644
> --- a/dts/framework/config/types.py
> +++ b/dts/framework/config/types.py
> @@ -46,7 +46,7 @@ class NodeConfigDict(TypedDict):
> """Allowed keys and values."""
>
> #:
> - hugepages: HugepageConfigurationDict
> + hugepages_2mb: HugepageConfigurationDict
> #:
> name: str
> #:
> diff --git a/dts/framework/testbed_model/linux_session.py
> b/dts/framework/testbed_model/linux_session.py
> index 5d24030c3d..37f5eacb21 100644
> --- a/dts/framework/testbed_model/linux_session.py
> +++ b/dts/framework/testbed_model/linux_session.py
> @@ -15,7 +15,7 @@
>
> from typing_extensions import NotRequired
>
> -from framework.exception import RemoteCommandExecutionError
> +from framework.exception import ConfigurationError,
> RemoteCommandExecutionError
> from framework.utils import expand_range
>
> from .cpu import LogicalCore
> @@ -87,25 +87,22 @@ def get_dpdk_file_prefix(self, dpdk_prefix: str) -> str:
> def setup_hugepages(self, hugepage_count: int, force_first_numa: bool) ->
> None:
You should either rename this to setup_hugepages_2mb() or preferably add a hugepage_size parameter.
> """Overrides :meth:`~.os_session.OSSession.setup_hugepages`."""
> self._logger.info("Getting Hugepage information.")
> - hugepage_size = self._get_hugepage_size()
> + if "hugepages-2048kB" not in self.send_command("ls
> /sys/kernel/mm/hugepages").stdout:
> + raise ConfigurationError("2MB hugepages not supported by
> operating system")
> hugepages_total = self._get_hugepages_total()
> self._numa_nodes = self._get_numa_nodes()
>
> - if force_first_numa or hugepages_total != hugepage_count:
> + if force_first_numa or hugepages_total < hugepage_count:
> # when forcing numa, we need to clear existing hugepages
> regardless
> # of size, so they can be moved to the first numa node
> - self._configure_huge_pages(hugepage_count, hugepage_size,
> force_first_numa)
> + self._configure_huge_pages(hugepage_count, 2048,
> force_first_numa)
> else:
> self._logger.info("Hugepages already configured.")
> self._mount_huge_pages()
>
> - def _get_hugepage_size(self) -> int:
> - hugepage_size = self.send_command("awk '/Hugepagesize/ {print $2}'
> /proc/meminfo").stdout
> - return int(hugepage_size)
> -
Removing _get_hugepage_size() is OK; alternatively rename it to _get_hugepage_default_size().
> def _get_hugepages_total(self) -> int:
Also here, preferably add a size parameter, or rename it to _get_hugepages_2mb_total().
> hugepages_total = self.send_command(
> - "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
> + "cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
> ).stdout
> return int(hugepages_total)
>
> --
> 2.44.0
With suggested changes,
Acked-by: Morten Brørup <mb@smartsharesystems.com>
> diff --git a/dts/framework/testbed_model/linux_session.py b/dts/framework/testbed_model/linux_session.py
> index 5d24030c3d..37f5eacb21 100644
> --- a/dts/framework/testbed_model/linux_session.py
> +++ b/dts/framework/testbed_model/linux_session.py
> @@ -15,7 +15,7 @@
>
> from typing_extensions import NotRequired
>
> -from framework.exception import RemoteCommandExecutionError
> +from framework.exception import ConfigurationError, RemoteCommandExecutionError
> from framework.utils import expand_range
>
> from .cpu import LogicalCore
> @@ -87,25 +87,22 @@ def get_dpdk_file_prefix(self, dpdk_prefix: str) -> str:
> def setup_hugepages(self, hugepage_count: int, force_first_numa: bool) -> None:
> """Overrides :meth:`~.os_session.OSSession.setup_hugepages`."""
> self._logger.info("Getting Hugepage information.")
> - hugepage_size = self._get_hugepage_size()
> + if "hugepages-2048kB" not in self.send_command("ls /sys/kernel/mm/hugepages").stdout:
I have one extra point on top of Morten's suggestions (which I like).
Let's create a class variable where we store the hugepage size (2048)
and use that across the code.
> + raise ConfigurationError("2MB hugepages not supported by operating system")
> hugepages_total = self._get_hugepages_total()
> self._numa_nodes = self._get_numa_nodes()
>
> - if force_first_numa or hugepages_total != hugepage_count:
> + if force_first_numa or hugepages_total < hugepage_count:
> # when forcing numa, we need to clear existing hugepages regardless
> # of size, so they can be moved to the first numa node
> - self._configure_huge_pages(hugepage_count, hugepage_size, force_first_numa)
> + self._configure_huge_pages(hugepage_count, 2048, force_first_numa)
> else:
> self._logger.info("Hugepages already configured.")
> self._mount_huge_pages()
>
> - def _get_hugepage_size(self) -> int:
> - hugepage_size = self.send_command("awk '/Hugepagesize/ {print $2}' /proc/meminfo").stdout
> - return int(hugepage_size)
> -
> def _get_hugepages_total(self) -> int:
> hugepages_total = self.send_command(
> - "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
> + "cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
> ).stdout
> return int(hugepages_total)
>
> --
> 2.44.0
>
On Wed, Apr 10, 2024 at 3:23 AM Morten Brørup <mb@smartsharesystems.com> wrote:
>
> > From: Nicholas Pratte [mailto:npratte@iol.unh.edu]
> > Sent: Tuesday, 9 April 2024 19.28
> >
> > The previous implementation configures and allocates hugepage sizes
> > based on a system default. This can lead to two problems: overallocation of
> > hugepages (which may crash the remote host), and configuration of hugepage
> > sizes that are not recommended during runtime. This new implementation
> > allows only 2MB hugepage allocation during runtime; any other unique
> > hugepage size must be configured by the end-user for initializing DTS.
> >
> > If the amount of 2MB hugepages requested exceeds the amount of 2MB
> > hugepages already configured on the system, then the system will remount
> > hugepages to cover the difference. If the amount of hugepages requested is
> > either less than or equal to the amount already configured on the system,
> > then nothing is done.
> >
> > Bugzilla ID: 1370
> > Signed-off-by: Nicholas Pratte <npratte@iol.unh.edu>
> > Reviewed-by: Jeremy Spewock <jspewock@iol.unh.edu>
> > ---
> > dts/conf.yaml | 4 ++--
> > dts/framework/config/__init__.py | 4 ++--
> > dts/framework/config/conf_yaml_schema.json | 6 +++---
> > dts/framework/config/types.py | 2 +-
> > dts/framework/testbed_model/linux_session.py | 15 ++++++---------
> > 5 files changed, 14 insertions(+), 17 deletions(-)
> >
> > diff --git a/dts/conf.yaml b/dts/conf.yaml
> > index 8068345dd5..56c3ae6f4c 100644
> > --- a/dts/conf.yaml
> > +++ b/dts/conf.yaml
> > @@ -35,7 +35,7 @@ nodes:
> > lcores: "" # use all the available logical cores
> > use_first_core: false # tells DPDK to use any physical core
> > memory_channels: 4 # tells DPDK to use 4 memory channels
> > - hugepages: # optional; if removed, will use system hugepage
> > configuration
> > + hugepages_2mb: # optional; if removed, will use system hugepage
> > configuration
> > amount: 256
> > force_first_numa: false
> > ports:
> > @@ -71,7 +71,7 @@ nodes:
> > os_driver: rdma
> > peer_node: "SUT 1"
> > peer_pci: "0000:00:08.1"
> > - hugepages: # optional; if removed, will use system hugepage
> > configuration
> > + hugepages_2mb: # optional; if removed, will use system hugepage
> > configuration
> > amount: 256
> > force_first_numa: false
> > traffic_generator:
> > diff --git a/dts/framework/config/__init__.py
> > b/dts/framework/config/__init__.py
> > index 4cb5c74059..b6f820e39e 100644
> > --- a/dts/framework/config/__init__.py
> > +++ b/dts/framework/config/__init__.py
> > @@ -255,8 +255,8 @@ def from_dict(
> > Either an SUT or TG configuration instance.
> > """
> > hugepage_config = None
> > - if "hugepages" in d:
> > - hugepage_config_dict = d["hugepages"]
> > + if "hugepages_2mb" in d:
> > + hugepage_config_dict = d["hugepages_2mb"]
> > if "force_first_numa" not in hugepage_config_dict:
> > hugepage_config_dict["force_first_numa"] = False
> > hugepage_config = HugepageConfiguration(**hugepage_config_dict)
> > diff --git a/dts/framework/config/conf_yaml_schema.json
> > b/dts/framework/config/conf_yaml_schema.json
> > index 4731f4511d..f4d7199523 100644
> > --- a/dts/framework/config/conf_yaml_schema.json
> > +++ b/dts/framework/config/conf_yaml_schema.json
> > @@ -146,7 +146,7 @@
> > "compiler"
> > ]
> > },
> > - "hugepages": {
> > + "hugepages_2mb": {
> > "type": "object",
> > "description": "Optional hugepage configuration. If not specified,
> > hugepages won't be configured and DTS will use system configuration.",
> > "properties": {
> > @@ -253,8 +253,8 @@
> > "type": "integer",
> > "description": "How many memory channels to use. Optional,
> > defaults to 1."
> > },
> > - "hugepages": {
> > - "$ref": "#/definitions/hugepages"
> > + "hugepages_2mb": {
> > + "$ref": "#/definitions/hugepages_2mb"
> > },
> > "ports": {
> > "type": "array",
> > diff --git a/dts/framework/config/types.py b/dts/framework/config/types.py
> > index 1927910d88..016e0c3dbd 100644
> > --- a/dts/framework/config/types.py
> > +++ b/dts/framework/config/types.py
> > @@ -46,7 +46,7 @@ class NodeConfigDict(TypedDict):
> > """Allowed keys and values."""
> >
> > #:
> > - hugepages: HugepageConfigurationDict
> > + hugepages_2mb: HugepageConfigurationDict
> > #:
> > name: str
> > #:
> > diff --git a/dts/framework/testbed_model/linux_session.py
> > b/dts/framework/testbed_model/linux_session.py
> > index 5d24030c3d..37f5eacb21 100644
> > --- a/dts/framework/testbed_model/linux_session.py
> > +++ b/dts/framework/testbed_model/linux_session.py
> > @@ -15,7 +15,7 @@
> >
> > from typing_extensions import NotRequired
> >
> > -from framework.exception import RemoteCommandExecutionError
> > +from framework.exception import ConfigurationError,
> > RemoteCommandExecutionError
> > from framework.utils import expand_range
> >
> > from .cpu import LogicalCore
> > @@ -87,25 +87,22 @@ def get_dpdk_file_prefix(self, dpdk_prefix: str) -> str:
> > def setup_hugepages(self, hugepage_count: int, force_first_numa: bool) ->
> > None:
>
> You should either rename this to setup_hugepages_2mb() or preferably add a hugepage_size parameter.
>
> > """Overrides :meth:`~.os_session.OSSession.setup_hugepages`."""
> > self._logger.info("Getting Hugepage information.")
> > - hugepage_size = self._get_hugepage_size()
> > + if "hugepages-2048kB" not in self.send_command("ls
> > /sys/kernel/mm/hugepages").stdout:
> > + raise ConfigurationError("2MB hugepages not supported by
> > operating system")
> > hugepages_total = self._get_hugepages_total()
> > self._numa_nodes = self._get_numa_nodes()
> >
> > - if force_first_numa or hugepages_total != hugepage_count:
> > + if force_first_numa or hugepages_total < hugepage_count:
> > # when forcing numa, we need to clear existing hugepages
> > regardless
> > # of size, so they can be moved to the first numa node
> > - self._configure_huge_pages(hugepage_count, hugepage_size,
> > force_first_numa)
> > + self._configure_huge_pages(hugepage_count, 2048,
> > force_first_numa)
> > else:
> > self._logger.info("Hugepages already configured.")
> > self._mount_huge_pages()
> >
> > - def _get_hugepage_size(self) -> int:
> > - hugepage_size = self.send_command("awk '/Hugepagesize/ {print $2}'
> > /proc/meminfo").stdout
> > - return int(hugepage_size)
> > -
>
> Removing _get_hugepage_size() is OK; alternatively rename it to _get_hugepage_default_size().
Agree with Morten's ideas and am okay with keeping the function if
Nick likes, but want to note to Nick that _get_hugepage_size will have
a different process even if we move from assuming 2mb in all cases (as
Morten suggested we may have to do in the far future). In that case,
we are going to make a decision based on arch, not on the current info
from /proc/meminfo. One of the important ideas behind this change is
that user cannot configure the hugepage size on the system that DTS
will use, outside of DTS.
@@ -35,7 +35,7 @@ nodes:
lcores: "" # use all the available logical cores
use_first_core: false # tells DPDK to use any physical core
memory_channels: 4 # tells DPDK to use 4 memory channels
- hugepages: # optional; if removed, will use system hugepage configuration
+ hugepages_2mb: # optional; if removed, will use system hugepage configuration
amount: 256
force_first_numa: false
ports:
@@ -71,7 +71,7 @@ nodes:
os_driver: rdma
peer_node: "SUT 1"
peer_pci: "0000:00:08.1"
- hugepages: # optional; if removed, will use system hugepage configuration
+ hugepages_2mb: # optional; if removed, will use system hugepage configuration
amount: 256
force_first_numa: false
traffic_generator:
@@ -255,8 +255,8 @@ def from_dict(
Either an SUT or TG configuration instance.
"""
hugepage_config = None
- if "hugepages" in d:
- hugepage_config_dict = d["hugepages"]
+ if "hugepages_2mb" in d:
+ hugepage_config_dict = d["hugepages_2mb"]
if "force_first_numa" not in hugepage_config_dict:
hugepage_config_dict["force_first_numa"] = False
hugepage_config = HugepageConfiguration(**hugepage_config_dict)
@@ -146,7 +146,7 @@
"compiler"
]
},
- "hugepages": {
+ "hugepages_2mb": {
"type": "object",
"description": "Optional hugepage configuration. If not specified, hugepages won't be configured and DTS will use system configuration.",
"properties": {
@@ -253,8 +253,8 @@
"type": "integer",
"description": "How many memory channels to use. Optional, defaults to 1."
},
- "hugepages": {
- "$ref": "#/definitions/hugepages"
+ "hugepages_2mb": {
+ "$ref": "#/definitions/hugepages_2mb"
},
"ports": {
"type": "array",
@@ -46,7 +46,7 @@ class NodeConfigDict(TypedDict):
"""Allowed keys and values."""
#:
- hugepages: HugepageConfigurationDict
+ hugepages_2mb: HugepageConfigurationDict
#:
name: str
#:
@@ -15,7 +15,7 @@
from typing_extensions import NotRequired
-from framework.exception import RemoteCommandExecutionError
+from framework.exception import ConfigurationError, RemoteCommandExecutionError
from framework.utils import expand_range
from .cpu import LogicalCore
@@ -87,25 +87,22 @@ def get_dpdk_file_prefix(self, dpdk_prefix: str) -> str:
def setup_hugepages(self, hugepage_count: int, force_first_numa: bool) -> None:
"""Overrides :meth:`~.os_session.OSSession.setup_hugepages`."""
self._logger.info("Getting Hugepage information.")
- hugepage_size = self._get_hugepage_size()
+ if "hugepages-2048kB" not in self.send_command("ls /sys/kernel/mm/hugepages").stdout:
+ raise ConfigurationError("2MB hugepages not supported by operating system")
hugepages_total = self._get_hugepages_total()
self._numa_nodes = self._get_numa_nodes()
- if force_first_numa or hugepages_total != hugepage_count:
+ if force_first_numa or hugepages_total < hugepage_count:
# when forcing numa, we need to clear existing hugepages regardless
# of size, so they can be moved to the first numa node
- self._configure_huge_pages(hugepage_count, hugepage_size, force_first_numa)
+ self._configure_huge_pages(hugepage_count, 2048, force_first_numa)
else:
self._logger.info("Hugepages already configured.")
self._mount_huge_pages()
- def _get_hugepage_size(self) -> int:
- hugepage_size = self.send_command("awk '/Hugepagesize/ {print $2}' /proc/meminfo").stdout
- return int(hugepage_size)
-
def _get_hugepages_total(self) -> int:
hugepages_total = self.send_command(
- "awk '/HugePages_Total/ { print $2 }' /proc/meminfo"
+ "cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
).stdout
return int(hugepages_total)