[RFC,v3,3/6] build: automatic NUMA and cpu counts detection

Message ID 1603280261-20206-4-git-send-email-juraj.linkes@pantheon.tech (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Arm build options rework |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Juraj Linkeš Oct. 21, 2020, 11:37 a.m. UTC
  The build machine's number of cpus and numa nodes vary, resulting in
mismatched counts of RTE_MAX_LCORE and RTE_MAX_NUMA_NODES for many
builds. Automatically discover the host's numa and cpu counts to remove
this mismatch for native builds. Use current defaults for default builds.
Force the users to specify the counts for cross build in cross files or
on the command line.
Give users the option to override the discovery or values from cross
files by specifying them on the command line with -Dmax_lcores and
-Dmax_numa_nodes.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
 buildtools/get_cpu_count.py  |  7 ++++++
 buildtools/get_numa_count.py | 22 +++++++++++++++++++
 buildtools/meson.build       |  2 ++
 config/meson.build           | 42 ++++++++++++++++++++++++++++++++++--
 meson_options.txt            |  8 +++----
 5 files changed, 75 insertions(+), 6 deletions(-)
 create mode 100644 buildtools/get_cpu_count.py
 create mode 100644 buildtools/get_numa_count.py
  

Comments

Bruce Richardson Oct. 21, 2020, 12:02 p.m. UTC | #1
On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> The build machine's number of cpus and numa nodes vary, resulting in
> mismatched counts of RTE_MAX_LCORE and RTE_MAX_NUMA_NODES for many
> builds. Automatically discover the host's numa and cpu counts to remove
> this mismatch for native builds. Use current defaults for default builds.
> Force the users to specify the counts for cross build in cross files or
> on the command line.
> Give users the option to override the discovery or values from cross
> files by specifying them on the command line with -Dmax_lcores and
> -Dmax_numa_nodes.
> 
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
>  buildtools/get_cpu_count.py  |  7 ++++++
>  buildtools/get_numa_count.py | 22 +++++++++++++++++++
>  buildtools/meson.build       |  2 ++
>  config/meson.build           | 42 ++++++++++++++++++++++++++++++++++--
>  meson_options.txt            |  8 +++----
>  5 files changed, 75 insertions(+), 6 deletions(-)
>  create mode 100644 buildtools/get_cpu_count.py
>  create mode 100644 buildtools/get_numa_count.py
> 
> diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> new file mode 100644
> index 000000000..386f85f8b
> --- /dev/null
> +++ b/buildtools/get_cpu_count.py
> @@ -0,0 +1,7 @@
> +#!/usr/bin/python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright (c) 2020 PANTHEON.tech s.r.o.
> +
> +import os
> +
> +print(os.cpu_count())
> diff --git a/buildtools/get_numa_count.py b/buildtools/get_numa_count.py
> new file mode 100644
> index 000000000..f0c49973a
> --- /dev/null
> +++ b/buildtools/get_numa_count.py
> @@ -0,0 +1,22 @@
> +#!/usr/bin/python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright (c) 2020 PANTHEON.tech s.r.o.
> +
> +import ctypes
> +import glob
> +import os
> +import subprocess
> +
> +if os.name == 'posix':
> +    if os.path.isdir('/sys/devices/system/node'):
> +        print(len(glob.glob('/sys/devices/system/node/node*')))
> +    else:
> +        print(subprocess.run(['sysctl', 'vm.ndomains'], capture_output=True).stdout)
> +
> +elif os.name == 'nt':
> +    libkernel32 = ctypes.windll.kernel32
> +
> +    count = ctypes.c_ulong()
> +
> +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> +    print(count.value + 1)
> diff --git a/buildtools/meson.build b/buildtools/meson.build
> index 04808dabc..925e733b1 100644
> --- a/buildtools/meson.build
> +++ b/buildtools/meson.build
> @@ -17,3 +17,5 @@ else
>  endif
>  map_to_win_cmd = py3 + files('map_to_win.py')
>  sphinx_wrapper = py3 + files('call-sphinx-build.py')
> +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> +get_numa_count_cmd = py3 + files('get_numa_count.py')
> diff --git a/config/meson.build b/config/meson.build
> index a57c8ae9e..c4477f977 100644
> --- a/config/meson.build
> +++ b/config/meson.build
> @@ -74,7 +74,11 @@ endif
>  # still being able to support the CPU features required for DPDK.
>  # This can be bumped up by the DPDK project, but it can never be an
>  # invariant like 'native'
> +max_lcores = get_option('max_lcores')
> +max_numa_nodes = get_option('max_numa_nodes')
>  if machine == 'default'
> +	max_numa_nodes = 4
> +	max_lcores = 128

This doesn't seem right, since you are overriding the user-specified values
with hard-coded ones.

>  	if host_machine.cpu_family().startswith('x86')
>  		# matches the old pre-meson build systems default
>  		machine = 'corei7'
> @@ -83,6 +87,22 @@ if machine == 'default'
>  	elif host_machine.cpu_family().startswith('ppc')
>  		machine = 'power8'
>  	endif
> +elif not meson.is_cross_build()
> +	# find host core count and numa node count for native builds
> +	if max_lcores == 0
> +		max_lcores = run_command(get_cpu_count_cmd).stdout().to_int()
> +		min_lcores = 2
> +		if max_lcores < min_lcores
> +			message('Found less than @0@ cores, building for @0@ cores'.format(min_lcores))
> +			max_lcores = min_lcores
> +		else
> +			message('Found @0@ cores'.format(max_lcores))
> +		endif
> +	endif
> +	if max_numa_nodes == 0
> +		max_numa_nodes = run_command(get_numa_count_cmd).stdout().to_int()
> +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> +	endif
>  endif
>  
>  dpdk_conf.set('RTE_MACHINE', machine)
> @@ -227,8 +247,10 @@ foreach arg: warning_flags
>  endforeach
>  
>  # set other values pulled from the build options
> -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
> +if not meson.is_cross_build()
> +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> +endif

Rather than conditionally setting the value here, you should move the
checks below up above this to simplify things.

>  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
>  dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
>  dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
> @@ -247,6 +269,22 @@ compile_time_cpuflags = []
>  subdir(arch_subdir)
>  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
>  
> +# check that cpu and numa count is set in cross builds
> +if meson.is_cross_build()
> +    	if max_lcores > 0
> +		# specified on the cmdline
> +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> +		error('Number of cores for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> +	endif
> +	if max_numa_nodes > 0
> +		# specified on the cmdline
> +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> +		error('Number of numa nodes for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> +	endif
> +endif
> +
>  # set the install path for the drivers
>  dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
>  
> diff --git a/meson_options.txt b/meson_options.txt
> index 9bf18ab6b..01b0c45c3 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
>  	description: 'set the target machine type')
>  option('max_ethports', type: 'integer', value: 32,
>  	description: 'maximum number of Ethernet devices')
> -option('max_lcores', type: 'integer', value: 128,
> -	description: 'maximum number of cores/threads supported by EAL')
> -option('max_numa_nodes', type: 'integer', value: 4,
> -	description: 'maximum number of NUMA nodes supported by EAL')
> +option('max_lcores', type: 'integer', value: 0,
> +	description: 'maximum number of cores/threads supported by EAL. Value 0 means the number of cpus on the host will be used. For cross build, set to non-zero to overwrite the cross-file value.')
> +option('max_numa_nodes', type: 'integer', value: 0,
> +	description: 'maximum number of NUMA nodes supported by EAL. Value 0 means the number of numa nodes on the host will be used. For cross build, set to non-zero to overwrite the cross-file value.')

I don't like this change, because it very much assumes for
non-cross-compiles that people will be running DPDK on the system they
build it on. That's a very, very big assumption! I'm ok with having zero as
a "detect" option, and having the values overridden from cross-files, but
not with detection as the default out-of-the-box option! Lots of users may
pull builds from a CI based on VMs with just a few cores, for instance.

>  option('enable_trace_fp', type: 'boolean', value: false,
>  	description: 'enable fast path trace points.')
>  option('tests', type: 'boolean', value: true,
> -- 
> 2.20.1
>
  
Juraj Linkeš Oct. 21, 2020, 1:01 p.m. UTC | #2
> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Wednesday, October 21, 2020 2:02 PM
> To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> Subject: Re: [RFC PATCH v3 3/6] build: automatic NUMA and cpu counts
> detection
> 
> On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > The build machine's number of cpus and numa nodes vary, resulting in
> > mismatched counts of RTE_MAX_LCORE and RTE_MAX_NUMA_NODES for
> many
> > builds. Automatically discover the host's numa and cpu counts to
> > remove this mismatch for native builds. Use current defaults for default builds.
> > Force the users to specify the counts for cross build in cross files
> > or on the command line.
> > Give users the option to override the discovery or values from cross
> > files by specifying them on the command line with -Dmax_lcores and
> > -Dmax_numa_nodes.
> >
> > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > ---
> >  buildtools/get_cpu_count.py  |  7 ++++++
> > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> >  buildtools/meson.build       |  2 ++
> >  config/meson.build           | 42 ++++++++++++++++++++++++++++++++++--
> >  meson_options.txt            |  8 +++----
> >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode 100644
> > buildtools/get_cpu_count.py  create mode 100644
> > buildtools/get_numa_count.py
> >
> > diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> > new file mode 100644 index 000000000..386f85f8b
> > --- /dev/null
> > +++ b/buildtools/get_cpu_count.py
> > @@ -0,0 +1,7 @@
> > +#!/usr/bin/python3
> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > +PANTHEON.tech s.r.o.
> > +
> > +import os
> > +
> > +print(os.cpu_count())
> > diff --git a/buildtools/get_numa_count.py
> > b/buildtools/get_numa_count.py new file mode 100644 index
> > 000000000..f0c49973a
> > --- /dev/null
> > +++ b/buildtools/get_numa_count.py
> > @@ -0,0 +1,22 @@
> > +#!/usr/bin/python3
> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > +PANTHEON.tech s.r.o.
> > +
> > +import ctypes
> > +import glob
> > +import os
> > +import subprocess
> > +
> > +if os.name == 'posix':
> > +    if os.path.isdir('/sys/devices/system/node'):
> > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > +    else:
> > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > +capture_output=True).stdout)
> > +
> > +elif os.name == 'nt':
> > +    libkernel32 = ctypes.windll.kernel32
> > +
> > +    count = ctypes.c_ulong()
> > +
> > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > +    print(count.value + 1)
> > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > 04808dabc..925e733b1 100644
> > --- a/buildtools/meson.build
> > +++ b/buildtools/meson.build
> > @@ -17,3 +17,5 @@ else
> >  endif
> >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper = py3 +
> > files('call-sphinx-build.py')
> > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > diff --git a/config/meson.build b/config/meson.build index
> > a57c8ae9e..c4477f977 100644
> > --- a/config/meson.build
> > +++ b/config/meson.build
> > @@ -74,7 +74,11 @@ endif
> >  # still being able to support the CPU features required for DPDK.
> >  # This can be bumped up by the DPDK project, but it can never be an
> > # invariant like 'native'
> > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > +get_option('max_numa_nodes')
> >  if machine == 'default'
> > +	max_numa_nodes = 4
> > +	max_lcores = 128
> 
> This doesn't seem right, since you are overriding the user-specified values with
> hard-coded ones.
> 

I understand we're using the default build/generic to build portalbe dpdk distro packages, meaning the settings for those packages should always be the same, no? If not, what should the default/generic build be? And when would someone do a default/generic build with their values? It wouldn't be a default/generic anymore, right?

> >  	if host_machine.cpu_family().startswith('x86')
> >  		# matches the old pre-meson build systems default
> >  		machine = 'corei7'
> > @@ -83,6 +87,22 @@ if machine == 'default'
> >  	elif host_machine.cpu_family().startswith('ppc')
> >  		machine = 'power8'
> >  	endif
> > +elif not meson.is_cross_build()
> > +	# find host core count and numa node count for native builds
> > +	if max_lcores == 0
> > +		max_lcores =
> run_command(get_cpu_count_cmd).stdout().to_int()
> > +		min_lcores = 2
> > +		if max_lcores < min_lcores
> > +			message('Found less than @0@ cores, building for
> @0@ cores'.format(min_lcores))
> > +			max_lcores = min_lcores
> > +		else
> > +			message('Found @0@ cores'.format(max_lcores))
> > +		endif
> > +	endif
> > +	if max_numa_nodes == 0
> > +		max_numa_nodes =
> run_command(get_numa_count_cmd).stdout().to_int()
> > +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> > +	endif
> >  endif
> >
> >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@ foreach
> > arg: warning_flags  endforeach
> >
> >  # set other values pulled from the build options
> > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
> > +if not meson.is_cross_build()
> > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes) endif
> 
> Rather than conditionally setting the value here, you should move the checks
> below up above this to simplify things.
> 

Do you mean the cross build checks? Those have to be after subdir(arch_subdir) so that we can override the values from cross files (as the commit message says). 

> >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) @@
> > -247,6 +269,22 @@ compile_time_cpuflags = []
> >  subdir(arch_subdir)
> >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > ','.join(compile_time_cpuflags))
> >
> > +# check that cpu and numa count is set in cross builds if
> > +meson.is_cross_build()
> > +    	if max_lcores > 0
> > +		# specified on the cmdline
> > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > +		error('Number of cores for cross build not specified in @0@
> subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > +	endif
> > +	if max_numa_nodes > 0
> > +		# specified on the cmdline
> > +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > +		error('Number of numa nodes for cross build not specified in
> @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > +	endif
> > +endif
> > +
> >  # set the install path for the drivers
> > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> >
> > diff --git a/meson_options.txt b/meson_options.txt index
> > 9bf18ab6b..01b0c45c3 100644
> > --- a/meson_options.txt
> > +++ b/meson_options.txt
> > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> >  	description: 'set the target machine type')  option('max_ethports',
> > type: 'integer', value: 32,
> >  	description: 'maximum number of Ethernet devices')
> > -option('max_lcores', type: 'integer', value: 128,
> > -	description: 'maximum number of cores/threads supported by EAL')
> > -option('max_numa_nodes', type: 'integer', value: 4,
> > -	description: 'maximum number of NUMA nodes supported by EAL')
> > +option('max_lcores', type: 'integer', value: 0,
> > +	description: 'maximum number of cores/threads supported by EAL.
> > +Value 0 means the number of cpus on the host will be used. For cross build,
> set to non-zero to overwrite the cross-file value.') option('max_numa_nodes',
> type: 'integer', value: 0,
> > +	description: 'maximum number of NUMA nodes supported by EAL. Value
> 0
> > +means the number of numa nodes on the host will be used. For cross
> > +build, set to non-zero to overwrite the cross-file value.')
> 
> I don't like this change, because it very much assumes for non-cross-compiles
> that people will be running DPDK on the system they build it on. That's a very,
> very big assumption!

I'll be using definitions from https://mesonbuild.com/Cross-compilation.html.
I understand cross compilation to be building for a diffent host machine than the build machine (which is aligned with pretty much every definition I found). I understand this to be true not only for builds between architectures, but also within an architecture (e.g. x86_64 build machine building for x86_64 host machine).
So yes, when someone does a native build, it stands to reason they want to use it on the build machine. If they wanted to use it elsewhere, they would cross compile.
Another thing is the current build philosophy is to detect as much as possible (not having statically defined configuration, as you mentioned in the past). Detecting the number of cores and numa nodes fits this perfectly.
And yet another thing is that the assumption seems to be already present in the build system - it already detects a lot things, some of which may not be satisfied on machines other than the build machine. I may be wrong about this.

> I'm ok with having zero as a "detect" option, and having
> the values overridden from cross-files, but not with detection as the default out-
> of-the-box option! Lots of users may pull builds from a CI based on VMs with
> just a few cores, for instance.

If not having the automatic detection is a concern because of users using CI builds, then we (if it's from our CI) can change what we're building in CI - the default/generic build seems like a good fit because it's supposed to work on a variety of systems. Expecting that native build from random VMs would work anywhere doesn't seen very realistic - it's been build for that VM environment (because it's a native build).

Here's my understanding on which the current version is based:
1. Since we want to get away from having statically defined configuration, numa and core count discovery is exactly what we should have in the build system. Since discorery is currently the default for lib/drivers, it stands to reason it should be default for everything else, if possible.
2. Native build should produce binaries matching the build machine as well as possible.
3. Default/generic build should produce binaries executable on a range of systems (ideally all systems of a given architecture).
4. Other builds, that is non-native builds, are cross-compilation, since we're building for host machine other that the build machine.

What I haven't taken into account is users using CI builds. That could be remedied by modifying the CI a bit while being consistent with what native/default/generic/cross builds are (or should be). And in any case, if we're not interested in testing the exact CI environment (which we aren't, since we don't want to use 2 cores with 1 numa), we really shouldn't be doing native builds there.

I'm interested in hearing where my thinking deviates from yours.

> 
> >  option('enable_trace_fp', type: 'boolean', value: false,
> >  	description: 'enable fast path trace points.')  option('tests',
> > type: 'boolean', value: true,
> > --
> > 2.20.1
> >
  
Bruce Richardson Oct. 21, 2020, 2:13 p.m. UTC | #3
On Wed, Oct 21, 2020 at 01:01:41PM +0000, Juraj Linkeš wrote:
> 
> 
> > -----Original Message-----
> > From: Bruce Richardson <bruce.richardson@intel.com>
> > Sent: Wednesday, October 21, 2020 2:02 PM
> > To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> > Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> > jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> > Subject: Re: [RFC PATCH v3 3/6] build: automatic NUMA and cpu counts
> > detection
> > 
> > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > The build machine's number of cpus and numa nodes vary, resulting in
> > > mismatched counts of RTE_MAX_LCORE and RTE_MAX_NUMA_NODES for
> > many
> > > builds. Automatically discover the host's numa and cpu counts to
> > > remove this mismatch for native builds. Use current defaults for default builds.
> > > Force the users to specify the counts for cross build in cross files
> > > or on the command line.
> > > Give users the option to override the discovery or values from cross
> > > files by specifying them on the command line with -Dmax_lcores and
> > > -Dmax_numa_nodes.
> > >
> > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > ---
> > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > >  buildtools/meson.build       |  2 ++
> > >  config/meson.build           | 42 ++++++++++++++++++++++++++++++++++--
> > >  meson_options.txt            |  8 +++----
> > >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode 100644
> > > buildtools/get_cpu_count.py  create mode 100644
> > > buildtools/get_numa_count.py
> > >
> > > diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> > > new file mode 100644 index 000000000..386f85f8b
> > > --- /dev/null
> > > +++ b/buildtools/get_cpu_count.py
> > > @@ -0,0 +1,7 @@
> > > +#!/usr/bin/python3
> > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > +PANTHEON.tech s.r.o.
> > > +
> > > +import os
> > > +
> > > +print(os.cpu_count())
> > > diff --git a/buildtools/get_numa_count.py
> > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > 000000000..f0c49973a
> > > --- /dev/null
> > > +++ b/buildtools/get_numa_count.py
> > > @@ -0,0 +1,22 @@
> > > +#!/usr/bin/python3
> > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > +PANTHEON.tech s.r.o.
> > > +
> > > +import ctypes
> > > +import glob
> > > +import os
> > > +import subprocess
> > > +
> > > +if os.name == 'posix':
> > > +    if os.path.isdir('/sys/devices/system/node'):
> > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > +    else:
> > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > +capture_output=True).stdout)
> > > +
> > > +elif os.name == 'nt':
> > > +    libkernel32 = ctypes.windll.kernel32
> > > +
> > > +    count = ctypes.c_ulong()
> > > +
> > > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > +    print(count.value + 1)
> > > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > > 04808dabc..925e733b1 100644
> > > --- a/buildtools/meson.build
> > > +++ b/buildtools/meson.build
> > > @@ -17,3 +17,5 @@ else
> > >  endif
> > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper = py3 +
> > > files('call-sphinx-build.py')
> > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > diff --git a/config/meson.build b/config/meson.build index
> > > a57c8ae9e..c4477f977 100644
> > > --- a/config/meson.build
> > > +++ b/config/meson.build
> > > @@ -74,7 +74,11 @@ endif
> > >  # still being able to support the CPU features required for DPDK.
> > >  # This can be bumped up by the DPDK project, but it can never be an
> > > # invariant like 'native'
> > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > +get_option('max_numa_nodes')
> > >  if machine == 'default'
> > > +	max_numa_nodes = 4
> > > +	max_lcores = 128
> > 
> > This doesn't seem right, since you are overriding the user-specified values with
> > hard-coded ones.
> > 
> 
> I understand we're using the default build/generic to build portalbe dpdk distro packages, meaning the settings for those packages should always be the same, no? If not, what should the default/generic build be? And when would someone do a default/generic build with their values? It wouldn't be a default/generic anymore, right?
> 
> > >  	if host_machine.cpu_family().startswith('x86')
> > >  		# matches the old pre-meson build systems default
> > >  		machine = 'corei7'
> > > @@ -83,6 +87,22 @@ if machine == 'default'
> > >  	elif host_machine.cpu_family().startswith('ppc')
> > >  		machine = 'power8'
> > >  	endif
> > > +elif not meson.is_cross_build()
> > > +	# find host core count and numa node count for native builds
> > > +	if max_lcores == 0
> > > +		max_lcores =
> > run_command(get_cpu_count_cmd).stdout().to_int()
> > > +		min_lcores = 2
> > > +		if max_lcores < min_lcores
> > > +			message('Found less than @0@ cores, building for
> > @0@ cores'.format(min_lcores))
> > > +			max_lcores = min_lcores
> > > +		else
> > > +			message('Found @0@ cores'.format(max_lcores))
> > > +		endif
> > > +	endif
> > > +	if max_numa_nodes == 0
> > > +		max_numa_nodes =
> > run_command(get_numa_count_cmd).stdout().to_int()
> > > +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> > > +	endif
> > >  endif
> > >
> > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@ foreach
> > > arg: warning_flags  endforeach
> > >
> > >  # set other values pulled from the build options
> > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
> > > +if not meson.is_cross_build()
> > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes) endif
> > 
> > Rather than conditionally setting the value here, you should move the checks
> > below up above this to simplify things.
> > 
> 
> Do you mean the cross build checks? Those have to be after subdir(arch_subdir) so that we can override the values from cross files (as the commit message says). 
> 
> > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) @@
> > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > >  subdir(arch_subdir)
> > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > ','.join(compile_time_cpuflags))
> > >
> > > +# check that cpu and numa count is set in cross builds if
> > > +meson.is_cross_build()
> > > +    	if max_lcores > 0
> > > +		# specified on the cmdline
> > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > +		error('Number of cores for cross build not specified in @0@
> > subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > > +	endif
> > > +	if max_numa_nodes > 0
> > > +		# specified on the cmdline
> > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > +		error('Number of numa nodes for cross build not specified in
> > @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > > +	endif
> > > +endif
> > > +
> > >  # set the install path for the drivers
> > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > >
> > > diff --git a/meson_options.txt b/meson_options.txt index
> > > 9bf18ab6b..01b0c45c3 100644
> > > --- a/meson_options.txt
> > > +++ b/meson_options.txt
> > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> > >  	description: 'set the target machine type')  option('max_ethports',
> > > type: 'integer', value: 32,
> > >  	description: 'maximum number of Ethernet devices')
> > > -option('max_lcores', type: 'integer', value: 128,
> > > -	description: 'maximum number of cores/threads supported by EAL')
> > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > -	description: 'maximum number of NUMA nodes supported by EAL')
> > > +option('max_lcores', type: 'integer', value: 0,
> > > +	description: 'maximum number of cores/threads supported by EAL.
> > > +Value 0 means the number of cpus on the host will be used. For cross build,
> > set to non-zero to overwrite the cross-file value.') option('max_numa_nodes',
> > type: 'integer', value: 0,
> > > +	description: 'maximum number of NUMA nodes supported by EAL. Value
> > 0
> > > +means the number of numa nodes on the host will be used. For cross
> > > +build, set to non-zero to overwrite the cross-file value.')
> > 
> > I don't like this change, because it very much assumes for non-cross-compiles
> > that people will be running DPDK on the system they build it on. That's a very,
> > very big assumption!
> 
> I'll be using definitions from https://mesonbuild.com/Cross-compilation.html.
> I understand cross compilation to be building for a diffent host machine than the build machine (which is aligned with pretty much every definition I found). I understand this to be true not only for builds between architectures, but also within an architecture (e.g. x86_64 build machine building for x86_64 host machine).
> So yes, when someone does a native build, it stands to reason they want to use it on the build machine. If they wanted to use it elsewhere, they would cross compile.
> Another thing is the current build philosophy is to detect as much as possible (not having statically defined configuration, as you mentioned in the past). Detecting the number of cores and numa nodes fits this perfectly.
> And yet another thing is that the assumption seems to be already present in the build system - it already detects a lot things, some of which may not be satisfied on machines other than the build machine. I may be wrong about this.
> 
> > I'm ok with having zero as a "detect" option, and having
> > the values overridden from cross-files, but not with detection as the default out-
> > of-the-box option! Lots of users may pull builds from a CI based on VMs with
> > just a few cores, for instance.
> 
> If not having the automatic detection is a concern because of users using CI builds, then we (if it's from our CI) can change what we're building in CI - the default/generic build seems like a good fit because it's supposed to work on a variety of systems. Expecting that native build from random VMs would work anywhere doesn't seen very realistic - it's been build for that VM environment (because it's a native build).
> 
> Here's my understanding on which the current version is based:
> 1. Since we want to get away from having statically defined configuration, numa and core count discovery is exactly what we should have in the build system. Since discorery is currently the default for lib/drivers, it stands to reason it should be default for everything else, if possible.
> 2. Native build should produce binaries matching the build machine as well as possible.
> 3. Default/generic build should produce binaries executable on a range of systems (ideally all systems of a given architecture).
> 4. Other builds, that is non-native builds, are cross-compilation, since we're building for host machine other that the build machine.
> 
> What I haven't taken into account is users using CI builds. That could be remedied by modifying the CI a bit while being consistent with what native/default/generic/cross builds are (or should be). And in any case, if we're not interested in testing the exact CI environment (which we aren't, since we don't want to use 2 cores with 1 numa), we really shouldn't be doing native builds there.
> 
> I'm interested in hearing where my thinking deviates from yours.
> 

There are a number of points in which we differ, I think.

Firstly, the use of "native" and "default/generic" for the "machine"
parameter refers only to the instruction-set level from the compiler, and
should not affect any other settings, since all settings are independent.
Therefore, setting "machine" to "native" does not mean that we should
detect cores and numa nodes, and similarly setting it to "default" does not
mean that we should ignore the settings for these values and pick our own
chosen default values.

Secondly, the use of cross-compilation only applies when you are compiling
for a different architecture or environment (e.g. OS) to what you are
building on. Building on a 4-core x86 machine to run on a dual-socket,
32-core x86 machine is not cross-compiling, but still needs to work by
default. Something like building a 32-bit binary on a 64-bit OS is in most
cases not done by cross-compilation either, but is rather outside the scope
of the discussion, except as a reference point to show the scope of
differences which can be accomodated as "native builds".

In terms of dynamic configuration for things like cores and numa nodes, the
ideal end state here is not to have them detected at build time on the host
system, but instead to have them detected at runtime and sized dynamically.
In the absense of that, these values should be set to reasonable defaults
so that when a user compiles up a binary without settings these explicitly
it should run on 95%+ of systems of that type.

This is my understanding of the issues, anyway. :-)

Regards,
/Bruce
  
Bruce Richardson Oct. 21, 2020, 2:27 p.m. UTC | #4
On Wed, Oct 21, 2020 at 03:13:19PM +0100, Bruce Richardson wrote:
> On Wed, Oct 21, 2020 at 01:01:41PM +0000, Juraj Linkeš wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Bruce Richardson <bruce.richardson@intel.com>
> > > Sent: Wednesday, October 21, 2020 2:02 PM
> > > To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> > > Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> > > jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> > > Subject: Re: [RFC PATCH v3 3/6] build: automatic NUMA and cpu counts
> > > detection
> > > 
> > > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > > The build machine's number of cpus and numa nodes vary, resulting in
> > > > mismatched counts of RTE_MAX_LCORE and RTE_MAX_NUMA_NODES for
> > > many
> > > > builds. Automatically discover the host's numa and cpu counts to
> > > > remove this mismatch for native builds. Use current defaults for default builds.
> > > > Force the users to specify the counts for cross build in cross files
> > > > or on the command line.
> > > > Give users the option to override the discovery or values from cross
> > > > files by specifying them on the command line with -Dmax_lcores and
> > > > -Dmax_numa_nodes.
> > > >
> > > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > ---
> > > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > > >  buildtools/meson.build       |  2 ++
> > > >  config/meson.build           | 42 ++++++++++++++++++++++++++++++++++--
> > > >  meson_options.txt            |  8 +++----
> > > >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode 100644
> > > > buildtools/get_cpu_count.py  create mode 100644
> > > > buildtools/get_numa_count.py
> > > >
> > > > diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> > > > new file mode 100644 index 000000000..386f85f8b
> > > > --- /dev/null
> > > > +++ b/buildtools/get_cpu_count.py
> > > > @@ -0,0 +1,7 @@
> > > > +#!/usr/bin/python3
> > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > +PANTHEON.tech s.r.o.
> > > > +
> > > > +import os
> > > > +
> > > > +print(os.cpu_count())
> > > > diff --git a/buildtools/get_numa_count.py
> > > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > > 000000000..f0c49973a
> > > > --- /dev/null
> > > > +++ b/buildtools/get_numa_count.py
> > > > @@ -0,0 +1,22 @@
> > > > +#!/usr/bin/python3
> > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > +PANTHEON.tech s.r.o.
> > > > +
> > > > +import ctypes
> > > > +import glob
> > > > +import os
> > > > +import subprocess
> > > > +
> > > > +if os.name == 'posix':
> > > > +    if os.path.isdir('/sys/devices/system/node'):
> > > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > > +    else:
> > > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > > +capture_output=True).stdout)
> > > > +
> > > > +elif os.name == 'nt':
> > > > +    libkernel32 = ctypes.windll.kernel32
> > > > +
> > > > +    count = ctypes.c_ulong()
> > > > +
> > > > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > > +    print(count.value + 1)
> > > > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > > > 04808dabc..925e733b1 100644
> > > > --- a/buildtools/meson.build
> > > > +++ b/buildtools/meson.build
> > > > @@ -17,3 +17,5 @@ else
> > > >  endif
> > > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper = py3 +
> > > > files('call-sphinx-build.py')
> > > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > > diff --git a/config/meson.build b/config/meson.build index
> > > > a57c8ae9e..c4477f977 100644
> > > > --- a/config/meson.build
> > > > +++ b/config/meson.build
> > > > @@ -74,7 +74,11 @@ endif
> > > >  # still being able to support the CPU features required for DPDK.
> > > >  # This can be bumped up by the DPDK project, but it can never be an
> > > > # invariant like 'native'
> > > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > > +get_option('max_numa_nodes')
> > > >  if machine == 'default'
> > > > +	max_numa_nodes = 4
> > > > +	max_lcores = 128
> > > 
> > > This doesn't seem right, since you are overriding the user-specified values with
> > > hard-coded ones.
> > > 
> > 
> > I understand we're using the default build/generic to build portalbe dpdk distro packages, meaning the settings for those packages should always be the same, no? If not, what should the default/generic build be? And when would someone do a default/generic build with their values? It wouldn't be a default/generic anymore, right?
> > 
> > > >  	if host_machine.cpu_family().startswith('x86')
> > > >  		# matches the old pre-meson build systems default
> > > >  		machine = 'corei7'
> > > > @@ -83,6 +87,22 @@ if machine == 'default'
> > > >  	elif host_machine.cpu_family().startswith('ppc')
> > > >  		machine = 'power8'
> > > >  	endif
> > > > +elif not meson.is_cross_build()
> > > > +	# find host core count and numa node count for native builds
> > > > +	if max_lcores == 0
> > > > +		max_lcores =
> > > run_command(get_cpu_count_cmd).stdout().to_int()
> > > > +		min_lcores = 2
> > > > +		if max_lcores < min_lcores
> > > > +			message('Found less than @0@ cores, building for
> > > @0@ cores'.format(min_lcores))
> > > > +			max_lcores = min_lcores
> > > > +		else
> > > > +			message('Found @0@ cores'.format(max_lcores))
> > > > +		endif
> > > > +	endif
> > > > +	if max_numa_nodes == 0
> > > > +		max_numa_nodes =
> > > run_command(get_numa_count_cmd).stdout().to_int()
> > > > +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> > > > +	endif
> > > >  endif
> > > >
> > > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@ foreach
> > > > arg: warning_flags  endforeach
> > > >
> > > >  # set other values pulled from the build options
> > > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > > -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
> > > > +if not meson.is_cross_build()
> > > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes) endif
> > > 
> > > Rather than conditionally setting the value here, you should move the checks
> > > below up above this to simplify things.
> > > 
> > 
> > Do you mean the cross build checks? Those have to be after subdir(arch_subdir) so that we can override the values from cross files (as the commit message says). 
> > 
> > > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > > dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) @@
> > > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > > >  subdir(arch_subdir)
> > > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > > ','.join(compile_time_cpuflags))
> > > >
> > > > +# check that cpu and numa count is set in cross builds if
> > > > +meson.is_cross_build()
> > > > +    	if max_lcores > 0
> > > > +		# specified on the cmdline
> > > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > > +		error('Number of cores for cross build not specified in @0@
> > > subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > > > +	endif
> > > > +	if max_numa_nodes > 0
> > > > +		# specified on the cmdline
> > > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> > > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > > +		error('Number of numa nodes for cross build not specified in
> > > @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > > > +	endif
> > > > +endif
> > > > +
> > > >  # set the install path for the drivers
> > > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > > >
> > > > diff --git a/meson_options.txt b/meson_options.txt index
> > > > 9bf18ab6b..01b0c45c3 100644
> > > > --- a/meson_options.txt
> > > > +++ b/meson_options.txt
> > > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> > > >  	description: 'set the target machine type')  option('max_ethports',
> > > > type: 'integer', value: 32,
> > > >  	description: 'maximum number of Ethernet devices')
> > > > -option('max_lcores', type: 'integer', value: 128,
> > > > -	description: 'maximum number of cores/threads supported by EAL')
> > > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > > -	description: 'maximum number of NUMA nodes supported by EAL')
> > > > +option('max_lcores', type: 'integer', value: 0,
> > > > +	description: 'maximum number of cores/threads supported by EAL.
> > > > +Value 0 means the number of cpus on the host will be used. For cross build,
> > > set to non-zero to overwrite the cross-file value.') option('max_numa_nodes',
> > > type: 'integer', value: 0,
> > > > +	description: 'maximum number of NUMA nodes supported by EAL. Value
> > > 0
> > > > +means the number of numa nodes on the host will be used. For cross
> > > > +build, set to non-zero to overwrite the cross-file value.')
> > > 
> > > I don't like this change, because it very much assumes for non-cross-compiles
> > > that people will be running DPDK on the system they build it on. That's a very,
> > > very big assumption!
> > 
> > I'll be using definitions from https://mesonbuild.com/Cross-compilation.html.
> > I understand cross compilation to be building for a diffent host machine than the build machine (which is aligned with pretty much every definition I found). I understand this to be true not only for builds between architectures, but also within an architecture (e.g. x86_64 build machine building for x86_64 host machine).
> > So yes, when someone does a native build, it stands to reason they want to use it on the build machine. If they wanted to use it elsewhere, they would cross compile.
> > Another thing is the current build philosophy is to detect as much as possible (not having statically defined configuration, as you mentioned in the past). Detecting the number of cores and numa nodes fits this perfectly.
> > And yet another thing is that the assumption seems to be already present in the build system - it already detects a lot things, some of which may not be satisfied on machines other than the build machine. I may be wrong about this.
> > 
> > > I'm ok with having zero as a "detect" option, and having
> > > the values overridden from cross-files, but not with detection as the default out-
> > > of-the-box option! Lots of users may pull builds from a CI based on VMs with
> > > just a few cores, for instance.
> > 
> > If not having the automatic detection is a concern because of users using CI builds, then we (if it's from our CI) can change what we're building in CI - the default/generic build seems like a good fit because it's supposed to work on a variety of systems. Expecting that native build from random VMs would work anywhere doesn't seen very realistic - it's been build for that VM environment (because it's a native build).
> > 
> > Here's my understanding on which the current version is based:
> > 1. Since we want to get away from having statically defined configuration, numa and core count discovery is exactly what we should have in the build system. Since discorery is currently the default for lib/drivers, it stands to reason it should be default for everything else, if possible.
> > 2. Native build should produce binaries matching the build machine as well as possible.
> > 3. Default/generic build should produce binaries executable on a range of systems (ideally all systems of a given architecture).
> > 4. Other builds, that is non-native builds, are cross-compilation, since we're building for host machine other that the build machine.
> > 
> > What I haven't taken into account is users using CI builds. That could be remedied by modifying the CI a bit while being consistent with what native/default/generic/cross builds are (or should be). And in any case, if we're not interested in testing the exact CI environment (which we aren't, since we don't want to use 2 cores with 1 numa), we really shouldn't be doing native builds there.
> > 
> > I'm interested in hearing where my thinking deviates from yours.
> > 
> 
> There are a number of points in which we differ, I think.
> 
> Firstly, the use of "native" and "default/generic" for the "machine"
> parameter refers only to the instruction-set level from the compiler, and
> should not affect any other settings, since all settings are independent.
> Therefore, setting "machine" to "native" does not mean that we should
> detect cores and numa nodes, and similarly setting it to "default" does not
> mean that we should ignore the settings for these values and pick our own
> chosen default values.
> 
> Secondly, the use of cross-compilation only applies when you are compiling
> for a different architecture or environment (e.g. OS) to what you are
> building on. Building on a 4-core x86 machine to run on a dual-socket,
> 32-core x86 machine is not cross-compiling, but still needs to work by
> default. Something like building a 32-bit binary on a 64-bit OS is in most
> cases not done by cross-compilation either, but is rather outside the scope
> of the discussion, except as a reference point to show the scope of
> differences which can be accomodated as "native builds".
> 
> In terms of dynamic configuration for things like cores and numa nodes, the
> ideal end state here is not to have them detected at build time on the host
> system, but instead to have them detected at runtime and sized dynamically.
> In the absense of that, these values should be set to reasonable defaults
> so that when a user compiles up a binary without settings these explicitly
> it should run on 95%+ of systems of that type.
> 
> This is my understanding of the issues, anyway. :-)
> 
What could possibly work is to set the defaults for these to "0" as done in
your patch, but thereafter have the resulting defaults set
per-architecture, rather than globally. That would allow x86 to tune things
more for native-style builds, while in all cases allowing user to override.

/Bruce
  
Juraj Linkeš Oct. 23, 2020, 10:07 a.m. UTC | #5
> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Wednesday, October 21, 2020 4:28 PM
> To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC PATCH v3 3/6] build: automatic NUMA and cpu
> counts detection
> 
> On Wed, Oct 21, 2020 at 03:13:19PM +0100, Bruce Richardson wrote:
> > On Wed, Oct 21, 2020 at 01:01:41PM +0000, Juraj Linkeš wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Bruce Richardson <bruce.richardson@intel.com>
> > > > Sent: Wednesday, October 21, 2020 2:02 PM
> > > > To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> > > > Phil.Yang@arm.com; vcchunga@amazon.com;
> Dharmik.Thakkar@arm.com;
> > > > jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> > > > Subject: Re: [RFC PATCH v3 3/6] build: automatic NUMA and cpu
> > > > counts detection
> > > >
> > > > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > > > The build machine's number of cpus and numa nodes vary,
> > > > > resulting in mismatched counts of RTE_MAX_LCORE and
> > > > > RTE_MAX_NUMA_NODES for
> > > > many
> > > > > builds. Automatically discover the host's numa and cpu counts to
> > > > > remove this mismatch for native builds. Use current defaults for default
> builds.
> > > > > Force the users to specify the counts for cross build in cross
> > > > > files or on the command line.
> > > > > Give users the option to override the discovery or values from
> > > > > cross files by specifying them on the command line with
> > > > > -Dmax_lcores and -Dmax_numa_nodes.
> > > > >
> > > > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > > ---
> > > > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > > > >  buildtools/meson.build       |  2 ++
> > > > >  config/meson.build           | 42 ++++++++++++++++++++++++++++++++++-
> -
> > > > >  meson_options.txt            |  8 +++----
> > > > >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode
> > > > > 100644 buildtools/get_cpu_count.py  create mode 100644
> > > > > buildtools/get_numa_count.py
> > > > >
> > > > > diff --git a/buildtools/get_cpu_count.py
> > > > > b/buildtools/get_cpu_count.py new file mode 100644 index
> > > > > 000000000..386f85f8b
> > > > > --- /dev/null
> > > > > +++ b/buildtools/get_cpu_count.py
> > > > > @@ -0,0 +1,7 @@
> > > > > +#!/usr/bin/python3
> > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > +PANTHEON.tech s.r.o.
> > > > > +
> > > > > +import os
> > > > > +
> > > > > +print(os.cpu_count())
> > > > > diff --git a/buildtools/get_numa_count.py
> > > > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > > > 000000000..f0c49973a
> > > > > --- /dev/null
> > > > > +++ b/buildtools/get_numa_count.py
> > > > > @@ -0,0 +1,22 @@
> > > > > +#!/usr/bin/python3
> > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > +PANTHEON.tech s.r.o.
> > > > > +
> > > > > +import ctypes
> > > > > +import glob
> > > > > +import os
> > > > > +import subprocess
> > > > > +
> > > > > +if os.name == 'posix':
> > > > > +    if os.path.isdir('/sys/devices/system/node'):
> > > > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > > > +    else:
> > > > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > > > +capture_output=True).stdout)
> > > > > +
> > > > > +elif os.name == 'nt':
> > > > > +    libkernel32 = ctypes.windll.kernel32
> > > > > +
> > > > > +    count = ctypes.c_ulong()
> > > > > +
> > > > > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > > > +    print(count.value + 1)
> > > > > diff --git a/buildtools/meson.build b/buildtools/meson.build
> > > > > index
> > > > > 04808dabc..925e733b1 100644
> > > > > --- a/buildtools/meson.build
> > > > > +++ b/buildtools/meson.build
> > > > > @@ -17,3 +17,5 @@ else
> > > > >  endif
> > > > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper =
> > > > > py3 +
> > > > > files('call-sphinx-build.py')
> > > > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > > > diff --git a/config/meson.build b/config/meson.build index
> > > > > a57c8ae9e..c4477f977 100644
> > > > > --- a/config/meson.build
> > > > > +++ b/config/meson.build
> > > > > @@ -74,7 +74,11 @@ endif
> > > > >  # still being able to support the CPU features required for DPDK.
> > > > >  # This can be bumped up by the DPDK project, but it can never
> > > > > be an # invariant like 'native'
> > > > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > > > +get_option('max_numa_nodes')
> > > > >  if machine == 'default'
> > > > > +	max_numa_nodes = 4
> > > > > +	max_lcores = 128
> > > >
> > > > This doesn't seem right, since you are overriding the
> > > > user-specified values with hard-coded ones.
> > > >
> > >
> > > I understand we're using the default build/generic to build portalbe dpdk
> distro packages, meaning the settings for those packages should always be the
> same, no? If not, what should the default/generic build be? And when would
> someone do a default/generic build with their values? It wouldn't be a
> default/generic anymore, right?
> > >
> > > > >  	if host_machine.cpu_family().startswith('x86')
> > > > >  		# matches the old pre-meson build systems default
> > > > >  		machine = 'corei7'
> > > > > @@ -83,6 +87,22 @@ if machine == 'default'
> > > > >  	elif host_machine.cpu_family().startswith('ppc')
> > > > >  		machine = 'power8'
> > > > >  	endif
> > > > > +elif not meson.is_cross_build()
> > > > > +	# find host core count and numa node count for native builds
> > > > > +	if max_lcores == 0
> > > > > +		max_lcores =
> > > > run_command(get_cpu_count_cmd).stdout().to_int()
> > > > > +		min_lcores = 2
> > > > > +		if max_lcores < min_lcores
> > > > > +			message('Found less than @0@ cores, building
> for
> > > > @0@ cores'.format(min_lcores))
> > > > > +			max_lcores = min_lcores
> > > > > +		else
> > > > > +			message('Found @0@
> cores'.format(max_lcores))
> > > > > +		endif
> > > > > +	endif
> > > > > +	if max_numa_nodes == 0
> > > > > +		max_numa_nodes =
> > > > run_command(get_numa_count_cmd).stdout().to_int()
> > > > > +		message('Found @0@ numa
> nodes'.format(max_numa_nodes))
> > > > > +	endif
> > > > >  endif
> > > > >
> > > > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@
> > > > > foreach
> > > > > arg: warning_flags  endforeach
> > > > >
> > > > >  # set other values pulled from the build options
> > > > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > > > -dpdk_conf.set('RTE_MAX_NUMA_NODES',
> > > > > get_option('max_numa_nodes'))
> > > > > +if not meson.is_cross_build()
> > > > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> endif
> > > >
> > > > Rather than conditionally setting the value here, you should move
> > > > the checks below up above this to simplify things.
> > > >
> > >
> > > Do you mean the cross build checks? Those have to be after
> subdir(arch_subdir) so that we can override the values from cross files (as the
> commit message says).
> > >
> > > > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > > > dpdk_conf.set('RTE_ENABLE_TRACE_FP',
> > > > > get_option('enable_trace_fp')) @@
> > > > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > > > >  subdir(arch_subdir)
> > > > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > > > ','.join(compile_time_cpuflags))
> > > > >
> > > > > +# check that cpu and numa count is set in cross builds if
> > > > > +meson.is_cross_build()
> > > > > +    	if max_lcores > 0
> > > > > +		# specified on the cmdline
> > > > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > > > +		error('Number of cores for cross build not specified in
> @0@
> > > > subdir (e.g. in a cross-file) nor on the
> > > > cmdline'.format(arch_subdir))
> > > > > +	endif
> > > > > +	if max_numa_nodes > 0
> > > > > +		# specified on the cmdline
> > > > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES',
> max_numa_nodes)
> > > > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > > > +		error('Number of numa nodes for cross build not
> specified in
> > > > @0@ subdir (e.g. in a cross-file) nor on the
> > > > cmdline'.format(arch_subdir))
> > > > > +	endif
> > > > > +endif
> > > > > +
> > > > >  # set the install path for the drivers
> > > > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > > > >
> > > > > diff --git a/meson_options.txt b/meson_options.txt index
> > > > > 9bf18ab6b..01b0c45c3 100644
> > > > > --- a/meson_options.txt
> > > > > +++ b/meson_options.txt
> > > > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> > > > >  	description: 'set the target machine type')
> > > > > option('max_ethports',
> > > > > type: 'integer', value: 32,
> > > > >  	description: 'maximum number of Ethernet devices')
> > > > > -option('max_lcores', type: 'integer', value: 128,
> > > > > -	description: 'maximum number of cores/threads supported by
> EAL')
> > > > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > > > -	description: 'maximum number of NUMA nodes supported by
> EAL')
> > > > > +option('max_lcores', type: 'integer', value: 0,
> > > > > +	description: 'maximum number of cores/threads supported by
> EAL.
> > > > > +Value 0 means the number of cpus on the host will be used. For
> > > > > +cross build,
> > > > set to non-zero to overwrite the cross-file value.')
> > > > option('max_numa_nodes',
> > > > type: 'integer', value: 0,
> > > > > +	description: 'maximum number of NUMA nodes supported by
> EAL.
> > > > > +Value
> > > > 0
> > > > > +means the number of numa nodes on the host will be used. For
> > > > > +cross build, set to non-zero to overwrite the cross-file
> > > > > +value.')
> > > >
> > > > I don't like this change, because it very much assumes for
> > > > non-cross-compiles that people will be running DPDK on the system
> > > > they build it on. That's a very, very big assumption!
> > >
> > > I'll be using definitions from https://mesonbuild.com/Cross-
> compilation.html.
> > > I understand cross compilation to be building for a diffent host machine than
> the build machine (which is aligned with pretty much every definition I found). I
> understand this to be true not only for builds between architectures, but also
> within an architecture (e.g. x86_64 build machine building for x86_64 host
> machine).
> > > So yes, when someone does a native build, it stands to reason they want to
> use it on the build machine. If they wanted to use it elsewhere, they would cross
> compile.
> > > Another thing is the current build philosophy is to detect as much as possible
> (not having statically defined configuration, as you mentioned in the past).
> Detecting the number of cores and numa nodes fits this perfectly.
> > > And yet another thing is that the assumption seems to be already present in
> the build system - it already detects a lot things, some of which may not be
> satisfied on machines other than the build machine. I may be wrong about this.
> > >
> > > > I'm ok with having zero as a "detect" option, and having the
> > > > values overridden from cross-files, but not with detection as the
> > > > default out- of-the-box option! Lots of users may pull builds from
> > > > a CI based on VMs with just a few cores, for instance.
> > >
> > > If not having the automatic detection is a concern because of users using CI
> builds, then we (if it's from our CI) can change what we're building in CI - the
> default/generic build seems like a good fit because it's supposed to work on a
> variety of systems. Expecting that native build from random VMs would work
> anywhere doesn't seen very realistic - it's been build for that VM environment
> (because it's a native build).
> > >
> > > Here's my understanding on which the current version is based:
> > > 1. Since we want to get away from having statically defined configuration,
> numa and core count discovery is exactly what we should have in the build
> system. Since discorery is currently the default for lib/drivers, it stands to reason
> it should be default for everything else, if possible.
> > > 2. Native build should produce binaries matching the build machine as well as
> possible.
> > > 3. Default/generic build should produce binaries executable on a range of
> systems (ideally all systems of a given architecture).
> > > 4. Other builds, that is non-native builds, are cross-compilation, since we're
> building for host machine other that the build machine.
> > >
> > > What I haven't taken into account is users using CI builds. That could be
> remedied by modifying the CI a bit while being consistent with what
> native/default/generic/cross builds are (or should be). And in any case, if we're
> not interested in testing the exact CI environment (which we aren't, since we
> don't want to use 2 cores with 1 numa), we really shouldn't be doing native
> builds there.
> > >
> > > I'm interested in hearing where my thinking deviates from yours.
> > >
> >
> > There are a number of points in which we differ, I think.
> >
> > Firstly, the use of "native" and "default/generic" for the "machine"
> > parameter refers only to the instruction-set level from the compiler,
> > and should not affect any other settings, since all settings are independent.

It seems that what you're saying may be true from the x86 point of view, but is not true from arm point of view and thus not true in general.
It's not true that all setting are independent for arm build. When building with machine=native we're discovering what machine we're building on and setting some settings (other than numa and core count) are based on that. For machine=generic we also have a set of values that build the generic binaries you describe below. More on this in that section.

> > Therefore, setting "machine" to "native" does not mean that we should
> > detect cores and numa nodes, and similarly setting it to "default"
> > does not mean that we should ignore the settings for these values and
> > pick our own chosen default values.
> >

I don't think this follows (if all settings are independent, then setting machine to native or anything else implies nothing for other settings). But I think it does follow from the what you write next.

However, since machine="default/generic" does not specify a type of build, but rather just one configuration option (or a small subset for arm builds), removing the ability to overwrite the default values when machine="default/generic" is not warranted.

> > Secondly, the use of cross-compilation only applies when you are
> > compiling for a different architecture or environment (e.g. OS) to
> > what you are building on. Building on a 4-core x86 machine to run on a
> > dual-socket, 32-core x86 machine is not cross-compiling, but still
> > needs to work by default.

We (the arm community, or at least a part of it) frequently refer to aarch64 -> aarch64 cross-compilation when we're building for a specific SoC (usually small/slow) on a fast server grade aarch64 machine. This is probably be because we're conflating what most people consider cross-compilation to be (building for a different arch or OS) with what meson cross compilation is (using a cross-file). Arm SoCs vary greatly and we want to capture that variation in cross files and use the cross files even on the same architecture (or rather, we want users to be able to). From now on when talking about cross compilation I'll mean meson cross compilation (i.e. using a cross file).

> > Something like building a 32-bit binary on a
> > 64-bit OS is in most cases not done by cross-compilation either, but
> > is rather outside the scope of the discussion, except as a reference
> > point to show the scope of differences which can be accomodated as "native
> builds".
> >
> > In terms of dynamic configuration for things like cores and numa
> > nodes, the ideal end state here is not to have them detected at build
> > time on the host system, but instead to have them detected at runtime and
> sized dynamically.

This would simplify everything, from building to users not having to worry about one more thing - would this be hard/time consuming to implement?

> > In the absense of that, these values should be set to reasonable
> > defaults so that when a user compiles up a binary without settings
> > these explicitly it should run on 95%+ of systems of that type.
> >

This, coupled with the paragraph about cross-compilation imply that we shouldn't default to discovering the values and I think that's reasonable. Having the ability to specify these on the command line, using an optional discovery mechanism and specifying these in a cross file should cover our (arm) needs.

> > This is my understanding of the issues, anyway. :-)
> >
> What could possibly work is to set the defaults for these to "0" as done in your
> patch, but thereafter have the resulting defaults set per-architecture, rather
> than globally. That would allow x86 to tune things more for native-style builds,
> while in all cases allowing user to override.
> 
> /Bruce

I'll change it so that per-arch (and for arm, per processor type) defaults are the defaults that will be used (when set to 0). I'd like to give users the option to use the discovery mechanism - I think we'll have to use another value with specific meaning and we can't use positive integers so I guess the best we can do is use -1 for this.

Juraj
  
Bruce Richardson Oct. 27, 2020, 10:30 a.m. UTC | #6
On Fri, Oct 23, 2020 at 10:07:17AM +0000, Juraj Linkeš wrote:
> 
> 
> > -----Original Message-----
> > From: Bruce Richardson <bruce.richardson@intel.com>
> > Sent: Wednesday, October 21, 2020 4:28 PM
> > To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> > Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> > jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [RFC PATCH v3 3/6] build: automatic NUMA and cpu
> > counts detection
> > 
<snip>
> > What could possibly work is to set the defaults for these to "0" as done in your
> > patch, but thereafter have the resulting defaults set per-architecture, rather
> > than globally. That would allow x86 to tune things more for native-style builds,
> > while in all cases allowing user to override.
> > 
> > /Bruce
> 
> I'll change it so that per-arch (and for arm, per processor type) defaults are the defaults that will be used (when set to 0). I'd like to give users the option to use the discovery mechanism - I think we'll have to use another value with specific meaning and we can't use positive integers so I guess the best we can do is use -1 for this.
> 
Well, if you want, if the defaults are set per-architecture, is you could
use detection for the default values for the ARM builds if the build-type
is set to "native" and the max_cores etc. are set to zero. For x86 that is
almost certainly not what we want, so we'd use hard-coded values for those
but no reason other architectures all need to do the same.

/Bruce
  
Honnappa Nagarahalli Oct. 29, 2020, 4:31 a.m. UTC | #7
<snip>

> > >
> > > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > > The build machine's number of cpus and numa nodes vary, resulting
> > > > in mismatched counts of RTE_MAX_LCORE and
> RTE_MAX_NUMA_NODES for
> > > many
> > > > builds. Automatically discover the host's numa and cpu counts to
> > > > remove this mismatch for native builds. Use current defaults for default
> builds.
> > > > Force the users to specify the counts for cross build in cross
> > > > files or on the command line.
> > > > Give users the option to override the discovery or values from
> > > > cross files by specifying them on the command line with
> > > > -Dmax_lcores and -Dmax_numa_nodes.
> > > >
> > > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > ---
> > > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > > >  buildtools/meson.build       |  2 ++
> > > >  config/meson.build           | 42
> ++++++++++++++++++++++++++++++++++--
> > > >  meson_options.txt            |  8 +++----
> > > >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode
> > > > 100644 buildtools/get_cpu_count.py  create mode 100644
> > > > buildtools/get_numa_count.py
> > > >
> > > > diff --git a/buildtools/get_cpu_count.py
> > > > b/buildtools/get_cpu_count.py new file mode 100644 index
> > > > 000000000..386f85f8b
> > > > --- /dev/null
> > > > +++ b/buildtools/get_cpu_count.py
> > > > @@ -0,0 +1,7 @@
> > > > +#!/usr/bin/python3
> > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > +PANTHEON.tech s.r.o.
> > > > +
> > > > +import os
> > > > +
> > > > +print(os.cpu_count())
> > > > diff --git a/buildtools/get_numa_count.py
> > > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > > 000000000..f0c49973a
> > > > --- /dev/null
> > > > +++ b/buildtools/get_numa_count.py
> > > > @@ -0,0 +1,22 @@
> > > > +#!/usr/bin/python3
> > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > +PANTHEON.tech s.r.o.
> > > > +
> > > > +import ctypes
> > > > +import glob
> > > > +import os
> > > > +import subprocess
> > > > +
> > > > +if os.name == 'posix':
> > > > +    if os.path.isdir('/sys/devices/system/node'):
> > > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > > +    else:
> > > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > > +capture_output=True).stdout)
> > > > +
> > > > +elif os.name == 'nt':
> > > > +    libkernel32 = ctypes.windll.kernel32
> > > > +
> > > > +    count = ctypes.c_ulong()
> > > > +
> > > > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > > +    print(count.value + 1)
> > > > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > > > 04808dabc..925e733b1 100644
> > > > --- a/buildtools/meson.build
> > > > +++ b/buildtools/meson.build
> > > > @@ -17,3 +17,5 @@ else
> > > >  endif
> > > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper =
> > > > py3 +
> > > > files('call-sphinx-build.py')
> > > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > > diff --git a/config/meson.build b/config/meson.build index
> > > > a57c8ae9e..c4477f977 100644
> > > > --- a/config/meson.build
> > > > +++ b/config/meson.build
> > > > @@ -74,7 +74,11 @@ endif
> > > >  # still being able to support the CPU features required for DPDK.
> > > >  # This can be bumped up by the DPDK project, but it can never be
> > > > an # invariant like 'native'
> > > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > > +get_option('max_numa_nodes')
> > > >  if machine == 'default'
> > > > +	max_numa_nodes = 4
> > > > +	max_lcores = 128
> > >
> > > This doesn't seem right, since you are overriding the user-specified
> > > values with hard-coded ones.
> > >
> >
> > I understand we're using the default build/generic to build portalbe dpdk
> distro packages, meaning the settings for those packages should always be
> the same, no? If not, what should the default/generic build be? And when
> would someone do a default/generic build with their values? It wouldn't be a
> default/generic anymore, right?
> >
> > > >  	if host_machine.cpu_family().startswith('x86')
> > > >  		# matches the old pre-meson build systems default
> > > >  		machine = 'corei7'
> > > > @@ -83,6 +87,22 @@ if machine == 'default'
> > > >  	elif host_machine.cpu_family().startswith('ppc')
> > > >  		machine = 'power8'
> > > >  	endif
> > > > +elif not meson.is_cross_build()
> > > > +	# find host core count and numa node count for native builds
> > > > +	if max_lcores == 0
> > > > +		max_lcores =
> > > run_command(get_cpu_count_cmd).stdout().to_int()
> > > > +		min_lcores = 2
> > > > +		if max_lcores < min_lcores
> > > > +			message('Found less than @0@ cores, building for
> > > @0@ cores'.format(min_lcores))
> > > > +			max_lcores = min_lcores
> > > > +		else
> > > > +			message('Found @0@ cores'.format(max_lcores))
> > > > +		endif
> > > > +	endif
> > > > +	if max_numa_nodes == 0
> > > > +		max_numa_nodes =
> > > run_command(get_numa_count_cmd).stdout().to_int()
> > > > +		message('Found @0@ numa
> nodes'.format(max_numa_nodes))
> > > > +	endif
> > > >  endif
> > > >
> > > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@
> > > > foreach
> > > > arg: warning_flags  endforeach
> > > >
> > > >  # set other values pulled from the build options
> > > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > > -dpdk_conf.set('RTE_MAX_NUMA_NODES',
> get_option('max_numa_nodes'))
> > > > +if not meson.is_cross_build()
> > > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> endif
> > >
> > > Rather than conditionally setting the value here, you should move
> > > the checks below up above this to simplify things.
> > >
> >
> > Do you mean the cross build checks? Those have to be after
> subdir(arch_subdir) so that we can override the values from cross files (as
> the commit message says).
> >
> > > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > > dpdk_conf.set('RTE_ENABLE_TRACE_FP',
> > > > get_option('enable_trace_fp')) @@
> > > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > > >  subdir(arch_subdir)
> > > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > > ','.join(compile_time_cpuflags))
> > > >
> > > > +# check that cpu and numa count is set in cross builds if
> > > > +meson.is_cross_build()
> > > > +    	if max_lcores > 0
> > > > +		# specified on the cmdline
> > > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > > +		error('Number of cores for cross build not specified in @0@
> > > subdir (e.g. in a cross-file) nor on the
> > > cmdline'.format(arch_subdir))
> > > > +	endif
> > > > +	if max_numa_nodes > 0
> > > > +		# specified on the cmdline
> > > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES',
> max_numa_nodes)
> > > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > > +		error('Number of numa nodes for cross build not specified in
> > > @0@ subdir (e.g. in a cross-file) nor on the
> > > cmdline'.format(arch_subdir))
> > > > +	endif
> > > > +endif
> > > > +
> > > >  # set the install path for the drivers
> > > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > > >
> > > > diff --git a/meson_options.txt b/meson_options.txt index
> > > > 9bf18ab6b..01b0c45c3 100644
> > > > --- a/meson_options.txt
> > > > +++ b/meson_options.txt
> > > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> > > >  	description: 'set the target machine type')
> > > > option('max_ethports',
> > > > type: 'integer', value: 32,
> > > >  	description: 'maximum number of Ethernet devices')
> > > > -option('max_lcores', type: 'integer', value: 128,
> > > > -	description: 'maximum number of cores/threads supported by EAL')
> > > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > > -	description: 'maximum number of NUMA nodes supported by EAL')
> > > > +option('max_lcores', type: 'integer', value: 0,
> > > > +	description: 'maximum number of cores/threads supported by EAL.
> > > > +Value 0 means the number of cpus on the host will be used. For
> > > > +cross build,
> > > set to non-zero to overwrite the cross-file value.')
> > > option('max_numa_nodes',
> > > type: 'integer', value: 0,
> > > > +	description: 'maximum number of NUMA nodes supported by EAL.
> > > > +Value
> > > 0
> > > > +means the number of numa nodes on the host will be used. For
> > > > +cross build, set to non-zero to overwrite the cross-file value.')
> > >
> > > I don't like this change, because it very much assumes for
> > > non-cross-compiles that people will be running DPDK on the system
> > > they build it on. That's a very, very big assumption!
> >
> > I'll be using definitions from https://mesonbuild.com/Cross-
> compilation.html.
> > I understand cross compilation to be building for a diffent host machine
> than the build machine (which is aligned with pretty much every definition I
> found). I understand this to be true not only for builds between
> architectures, but also within an architecture (e.g. x86_64 build machine
> building for x86_64 host machine).
> > So yes, when someone does a native build, it stands to reason they want to
> use it on the build machine. If they wanted to use it elsewhere, they would
> cross compile.
> > Another thing is the current build philosophy is to detect as much as
> possible (not having statically defined configuration, as you mentioned in the
> past). Detecting the number of cores and numa nodes fits this perfectly.
> > And yet another thing is that the assumption seems to be already present
> in the build system - it already detects a lot things, some of which may not be
> satisfied on machines other than the build machine. I may be wrong about
> this.
> >
> > > I'm ok with having zero as a "detect" option, and having the values
> > > overridden from cross-files, but not with detection as the default
> > > out- of-the-box option! Lots of users may pull builds from a CI
> > > based on VMs with just a few cores, for instance.
> >
> > If not having the automatic detection is a concern because of users using CI
> builds, then we (if it's from our CI) can change what we're building in CI - the
> default/generic build seems like a good fit because it's supposed to work on
> a variety of systems. Expecting that native build from random VMs would
> work anywhere doesn't seen very realistic - it's been build for that VM
> environment (because it's a native build).
> >
> > Here's my understanding on which the current version is based:
> > 1. Since we want to get away from having statically defined configuration,
> numa and core count discovery is exactly what we should have in the build
> system. Since discorery is currently the default for lib/drivers, it stands to
> reason it should be default for everything else, if possible.
> > 2. Native build should produce binaries matching the build machine as well
> as possible.
> > 3. Default/generic build should produce binaries executable on a range of
> systems (ideally all systems of a given architecture).
> > 4. Other builds, that is non-native builds, are cross-compilation, since we're
> building for host machine other that the build machine.
> >
> > What I haven't taken into account is users using CI builds. That could be
> remedied by modifying the CI a bit while being consistent with what
> native/default/generic/cross builds are (or should be). And in any case, if
> we're not interested in testing the exact CI environment (which we aren't,
> since we don't want to use 2 cores with 1 numa), we really shouldn't be doing
> native builds there.
> >
> > I'm interested in hearing where my thinking deviates from yours.
> >
> 
> There are a number of points in which we differ, I think.
> 
> Firstly, the use of "native" and "default/generic" for the "machine"
> parameter refers only to the instruction-set level from the compiler, and
> should not affect any other settings, since all settings are independent.
> Therefore, setting "machine" to "native" does not mean that we should
> detect cores and numa nodes, and similarly setting it to "default" does not
> mean that we should ignore the settings for these values and pick our own
> chosen default values.
Apologies to go to an older discussion.
I am trying to understand the definitions/expectations for 'native' and 'generic' builds.
As you say, instruction-set level is definitely one parameter. But, I think other DPDK specific parameters should also be considered.
For ex: RTE_MAX_LCORE should have a particular value for 'generic' build in all the supported architectures. The value could be different for each architecture, but it is fixed for the 'generic' build for a given architecture. Otherwise, the 'generic' build might not run on all the machines of that architecture.

Similarly, for 'native' build, is there any reason not to include other DPDK parameters as part of the definition? IMO, 'native' should refer to the entire build machine, not just the ISA. i.e. build on the target machine.

> 
> Secondly, the use of cross-compilation only applies when you are compiling
> for a different architecture or environment (e.g. OS) to what you are building
> on. Building on a 4-core x86 machine to run on a dual-socket, 32-core x86
> machine is not cross-compiling, but still needs to work by default. Something
> like building a 32-bit binary on a 64-bit OS is in most cases not done by cross-
> compilation either, but is rather outside the scope of the discussion, except
> as a reference point to show the scope of differences which can be
> accomodated as "native builds".
> 
> In terms of dynamic configuration for things like cores and numa nodes, the
> ideal end state here is not to have them detected at build time on the host
> system, but instead to have them detected at runtime and sized dynamically.
> In the absense of that, these values should be set to reasonable defaults so
> that when a user compiles up a binary without settings these explicitly it
> should run on 95%+ of systems of that type.
For the 'generic' build (which I also read it as DPDK binaries packaged with distros), this should be 100%.

> 
> This is my understanding of the issues, anyway. :-)
> 
> Regards,
> /Bruce
  
Bruce Richardson Nov. 2, 2020, 1:55 p.m. UTC | #8
On Thu, Oct 29, 2020 at 04:31:33AM +0000, Honnappa Nagarahalli wrote:
> <snip>
> 
> > > >
> > > > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > > > The build machine's number of cpus and numa nodes vary, resulting
> > > > > in mismatched counts of RTE_MAX_LCORE and
> > RTE_MAX_NUMA_NODES for
> > > > many
> > > > > builds. Automatically discover the host's numa and cpu counts to
> > > > > remove this mismatch for native builds. Use current defaults for default
> > builds.
> > > > > Force the users to specify the counts for cross build in cross
> > > > > files or on the command line.
> > > > > Give users the option to override the discovery or values from
> > > > > cross files by specifying them on the command line with
> > > > > -Dmax_lcores and -Dmax_numa_nodes.
> > > > >
> > > > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > > ---
> > > > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > > > >  buildtools/meson.build       |  2 ++
> > > > >  config/meson.build           | 42
> > ++++++++++++++++++++++++++++++++++--
> > > > >  meson_options.txt            |  8 +++----
> > > > >  5 files changed, 75 insertions(+), 6 deletions(-)  create mode
> > > > > 100644 buildtools/get_cpu_count.py  create mode 100644
> > > > > buildtools/get_numa_count.py
> > > > >
> > > > > diff --git a/buildtools/get_cpu_count.py
> > > > > b/buildtools/get_cpu_count.py new file mode 100644 index
> > > > > 000000000..386f85f8b
> > > > > --- /dev/null
> > > > > +++ b/buildtools/get_cpu_count.py
> > > > > @@ -0,0 +1,7 @@
> > > > > +#!/usr/bin/python3
> > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > +PANTHEON.tech s.r.o.
> > > > > +
> > > > > +import os
> > > > > +
> > > > > +print(os.cpu_count())
> > > > > diff --git a/buildtools/get_numa_count.py
> > > > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > > > 000000000..f0c49973a
> > > > > --- /dev/null
> > > > > +++ b/buildtools/get_numa_count.py
> > > > > @@ -0,0 +1,22 @@
> > > > > +#!/usr/bin/python3
> > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > +PANTHEON.tech s.r.o.
> > > > > +
> > > > > +import ctypes
> > > > > +import glob
> > > > > +import os
> > > > > +import subprocess
> > > > > +
> > > > > +if os.name == 'posix':
> > > > > +    if os.path.isdir('/sys/devices/system/node'):
> > > > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > > > +    else:
> > > > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > > > +capture_output=True).stdout)
> > > > > +
> > > > > +elif os.name == 'nt':
> > > > > +    libkernel32 = ctypes.windll.kernel32
> > > > > +
> > > > > +    count = ctypes.c_ulong()
> > > > > +
> > > > > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > > > +    print(count.value + 1)
> > > > > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > > > > 04808dabc..925e733b1 100644
> > > > > --- a/buildtools/meson.build
> > > > > +++ b/buildtools/meson.build
> > > > > @@ -17,3 +17,5 @@ else
> > > > >  endif
> > > > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper =
> > > > > py3 +
> > > > > files('call-sphinx-build.py')
> > > > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > > > diff --git a/config/meson.build b/config/meson.build index
> > > > > a57c8ae9e..c4477f977 100644
> > > > > --- a/config/meson.build
> > > > > +++ b/config/meson.build
> > > > > @@ -74,7 +74,11 @@ endif
> > > > >  # still being able to support the CPU features required for DPDK.
> > > > >  # This can be bumped up by the DPDK project, but it can never be
> > > > > an # invariant like 'native'
> > > > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > > > +get_option('max_numa_nodes')
> > > > >  if machine == 'default'
> > > > > +	max_numa_nodes = 4
> > > > > +	max_lcores = 128
> > > >
> > > > This doesn't seem right, since you are overriding the user-specified
> > > > values with hard-coded ones.
> > > >
> > >
> > > I understand we're using the default build/generic to build portalbe dpdk
> > distro packages, meaning the settings for those packages should always be
> > the same, no? If not, what should the default/generic build be? And when
> > would someone do a default/generic build with their values? It wouldn't be a
> > default/generic anymore, right?
> > >
> > > > >  	if host_machine.cpu_family().startswith('x86')
> > > > >  		# matches the old pre-meson build systems default
> > > > >  		machine = 'corei7'
> > > > > @@ -83,6 +87,22 @@ if machine == 'default'
> > > > >  	elif host_machine.cpu_family().startswith('ppc')
> > > > >  		machine = 'power8'
> > > > >  	endif
> > > > > +elif not meson.is_cross_build()
> > > > > +	# find host core count and numa node count for native builds
> > > > > +	if max_lcores == 0
> > > > > +		max_lcores =
> > > > run_command(get_cpu_count_cmd).stdout().to_int()
> > > > > +		min_lcores = 2
> > > > > +		if max_lcores < min_lcores
> > > > > +			message('Found less than @0@ cores, building for
> > > > @0@ cores'.format(min_lcores))
> > > > > +			max_lcores = min_lcores
> > > > > +		else
> > > > > +			message('Found @0@ cores'.format(max_lcores))
> > > > > +		endif
> > > > > +	endif
> > > > > +	if max_numa_nodes == 0
> > > > > +		max_numa_nodes =
> > > > run_command(get_numa_count_cmd).stdout().to_int()
> > > > > +		message('Found @0@ numa
> > nodes'.format(max_numa_nodes))
> > > > > +	endif
> > > > >  endif
> > > > >
> > > > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@
> > > > > foreach
> > > > > arg: warning_flags  endforeach
> > > > >
> > > > >  # set other values pulled from the build options
> > > > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > > > -dpdk_conf.set('RTE_MAX_NUMA_NODES',
> > get_option('max_numa_nodes'))
> > > > > +if not meson.is_cross_build()
> > > > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> > endif
> > > >
> > > > Rather than conditionally setting the value here, you should move
> > > > the checks below up above this to simplify things.
> > > >
> > >
> > > Do you mean the cross build checks? Those have to be after
> > subdir(arch_subdir) so that we can override the values from cross files (as
> > the commit message says).
> > >
> > > > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > > > dpdk_conf.set('RTE_ENABLE_TRACE_FP',
> > > > > get_option('enable_trace_fp')) @@
> > > > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > > > >  subdir(arch_subdir)
> > > > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > > > ','.join(compile_time_cpuflags))
> > > > >
> > > > > +# check that cpu and numa count is set in cross builds if
> > > > > +meson.is_cross_build()
> > > > > +    	if max_lcores > 0
> > > > > +		# specified on the cmdline
> > > > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > > > +		error('Number of cores for cross build not specified in @0@
> > > > subdir (e.g. in a cross-file) nor on the
> > > > cmdline'.format(arch_subdir))
> > > > > +	endif
> > > > > +	if max_numa_nodes > 0
> > > > > +		# specified on the cmdline
> > > > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES',
> > max_numa_nodes)
> > > > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > > > +		error('Number of numa nodes for cross build not specified in
> > > > @0@ subdir (e.g. in a cross-file) nor on the
> > > > cmdline'.format(arch_subdir))
> > > > > +	endif
> > > > > +endif
> > > > > +
> > > > >  # set the install path for the drivers
> > > > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > > > >
> > > > > diff --git a/meson_options.txt b/meson_options.txt index
> > > > > 9bf18ab6b..01b0c45c3 100644
> > > > > --- a/meson_options.txt
> > > > > +++ b/meson_options.txt
> > > > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value: 'native',
> > > > >  	description: 'set the target machine type')
> > > > > option('max_ethports',
> > > > > type: 'integer', value: 32,
> > > > >  	description: 'maximum number of Ethernet devices')
> > > > > -option('max_lcores', type: 'integer', value: 128,
> > > > > -	description: 'maximum number of cores/threads supported by EAL')
> > > > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > > > -	description: 'maximum number of NUMA nodes supported by EAL')
> > > > > +option('max_lcores', type: 'integer', value: 0,
> > > > > +	description: 'maximum number of cores/threads supported by EAL.
> > > > > +Value 0 means the number of cpus on the host will be used. For
> > > > > +cross build,
> > > > set to non-zero to overwrite the cross-file value.')
> > > > option('max_numa_nodes',
> > > > type: 'integer', value: 0,
> > > > > +	description: 'maximum number of NUMA nodes supported by EAL.
> > > > > +Value
> > > > 0
> > > > > +means the number of numa nodes on the host will be used. For
> > > > > +cross build, set to non-zero to overwrite the cross-file value.')
> > > >
> > > > I don't like this change, because it very much assumes for
> > > > non-cross-compiles that people will be running DPDK on the system
> > > > they build it on. That's a very, very big assumption!
> > >
> > > I'll be using definitions from https://mesonbuild.com/Cross-
> > compilation.html.
> > > I understand cross compilation to be building for a diffent host machine
> > than the build machine (which is aligned with pretty much every definition I
> > found). I understand this to be true not only for builds between
> > architectures, but also within an architecture (e.g. x86_64 build machine
> > building for x86_64 host machine).
> > > So yes, when someone does a native build, it stands to reason they want to
> > use it on the build machine. If they wanted to use it elsewhere, they would
> > cross compile.
> > > Another thing is the current build philosophy is to detect as much as
> > possible (not having statically defined configuration, as you mentioned in the
> > past). Detecting the number of cores and numa nodes fits this perfectly.
> > > And yet another thing is that the assumption seems to be already present
> > in the build system - it already detects a lot things, some of which may not be
> > satisfied on machines other than the build machine. I may be wrong about
> > this.
> > >
> > > > I'm ok with having zero as a "detect" option, and having the values
> > > > overridden from cross-files, but not with detection as the default
> > > > out- of-the-box option! Lots of users may pull builds from a CI
> > > > based on VMs with just a few cores, for instance.
> > >
> > > If not having the automatic detection is a concern because of users using CI
> > builds, then we (if it's from our CI) can change what we're building in CI - the
> > default/generic build seems like a good fit because it's supposed to work on
> > a variety of systems. Expecting that native build from random VMs would
> > work anywhere doesn't seen very realistic - it's been build for that VM
> > environment (because it's a native build).
> > >
> > > Here's my understanding on which the current version is based:
> > > 1. Since we want to get away from having statically defined configuration,
> > numa and core count discovery is exactly what we should have in the build
> > system. Since discorery is currently the default for lib/drivers, it stands to
> > reason it should be default for everything else, if possible.
> > > 2. Native build should produce binaries matching the build machine as well
> > as possible.
> > > 3. Default/generic build should produce binaries executable on a range of
> > systems (ideally all systems of a given architecture).
> > > 4. Other builds, that is non-native builds, are cross-compilation, since we're
> > building for host machine other that the build machine.
> > >
> > > What I haven't taken into account is users using CI builds. That could be
> > remedied by modifying the CI a bit while being consistent with what
> > native/default/generic/cross builds are (or should be). And in any case, if
> > we're not interested in testing the exact CI environment (which we aren't,
> > since we don't want to use 2 cores with 1 numa), we really shouldn't be doing
> > native builds there.
> > >
> > > I'm interested in hearing where my thinking deviates from yours.
> > >
> > 
> > There are a number of points in which we differ, I think.
> > 
> > Firstly, the use of "native" and "default/generic" for the "machine"
> > parameter refers only to the instruction-set level from the compiler, and
> > should not affect any other settings, since all settings are independent.
> > Therefore, setting "machine" to "native" does not mean that we should
> > detect cores and numa nodes, and similarly setting it to "default" does not
> > mean that we should ignore the settings for these values and pick our own
> > chosen default values.
> Apologies to go to an older discussion.
> I am trying to understand the definitions/expectations for 'native' and 'generic' builds.
> As you say, instruction-set level is definitely one parameter.

Part of the confusion arises from the fact that originally that was the
only parameter set by this - and on x86 it still is. Perhaps this parameter
needs to be renamed to "isa-level" or "architecture-flag" or similar to
reflect its meaning. This would then allow a new "machine" setting, which
can be considered separately. The question then is how much that helps with
the main issue under discussion, that of cores and numa node values.

> But, I think other DPDK specific parameters should also be considered.
> For ex: RTE_MAX_LCORE should have a particular value for 'generic' build in all the supported architectures. The value could be different for each architecture, but it is fixed for the 'generic' build for a given architecture. Otherwise, the 'generic' build might not run on all the machines of that architecture.
> 
> Similarly, for 'native' build, is there any reason not to include other DPDK parameters as part of the definition? IMO, 'native' should refer to the entire build machine, not just the ISA. i.e. build on the target machine.
> 

While I understand the idea here, it is somewhat complicated by the fact
that the meson options given in "meson_options.txt" cannot be set by meson
code, which means that when we change the machine flag to "native" we can
only use or ignore the user-provided lcores and numa nodes setting - we
have no way to change them and reflect those changes back to the user. :-(
This leads to the situation in the discussion in this thread, where we
start needing all sorts of magic values to indicate use of machine-type
defaults or detected defaults.

Regards,
/Bruce
  
Honnappa Nagarahalli Nov. 2, 2020, 7:01 p.m. UTC | #9
<snip>

> >
> > > > >
> > > > > On Wed, Oct 21, 2020 at 01:37:38PM +0200, Juraj Linkeš wrote:
> > > > > > The build machine's number of cpus and numa nodes vary,
> > > > > > resulting in mismatched counts of RTE_MAX_LCORE and
> > > RTE_MAX_NUMA_NODES for
> > > > > many
> > > > > > builds. Automatically discover the host's numa and cpu counts
> > > > > > to remove this mismatch for native builds. Use current
> > > > > > defaults for default
> > > builds.
> > > > > > Force the users to specify the counts for cross build in cross
> > > > > > files or on the command line.
> > > > > > Give users the option to override the discovery or values from
> > > > > > cross files by specifying them on the command line with
> > > > > > -Dmax_lcores and -Dmax_numa_nodes.
> > > > > >
> > > > > > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > > > > > ---
> > > > > >  buildtools/get_cpu_count.py  |  7 ++++++
> > > > > > buildtools/get_numa_count.py | 22 +++++++++++++++++++
> > > > > >  buildtools/meson.build       |  2 ++
> > > > > >  config/meson.build           | 42
> > > ++++++++++++++++++++++++++++++++++--
> > > > > >  meson_options.txt            |  8 +++----
> > > > > >  5 files changed, 75 insertions(+), 6 deletions(-)  create
> > > > > > mode
> > > > > > 100644 buildtools/get_cpu_count.py  create mode 100644
> > > > > > buildtools/get_numa_count.py
> > > > > >
> > > > > > diff --git a/buildtools/get_cpu_count.py
> > > > > > b/buildtools/get_cpu_count.py new file mode 100644 index
> > > > > > 000000000..386f85f8b
> > > > > > --- /dev/null
> > > > > > +++ b/buildtools/get_cpu_count.py
> > > > > > @@ -0,0 +1,7 @@
> > > > > > +#!/usr/bin/python3
> > > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > > +PANTHEON.tech s.r.o.
> > > > > > +
> > > > > > +import os
> > > > > > +
> > > > > > +print(os.cpu_count())
> > > > > > diff --git a/buildtools/get_numa_count.py
> > > > > > b/buildtools/get_numa_count.py new file mode 100644 index
> > > > > > 000000000..f0c49973a
> > > > > > --- /dev/null
> > > > > > +++ b/buildtools/get_numa_count.py
> > > > > > @@ -0,0 +1,22 @@
> > > > > > +#!/usr/bin/python3
> > > > > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > > > > > +PANTHEON.tech s.r.o.
> > > > > > +
> > > > > > +import ctypes
> > > > > > +import glob
> > > > > > +import os
> > > > > > +import subprocess
> > > > > > +
> > > > > > +if os.name == 'posix':
> > > > > > +    if os.path.isdir('/sys/devices/system/node'):
> > > > > > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > > > > > +    else:
> > > > > > +        print(subprocess.run(['sysctl', 'vm.ndomains'],
> > > > > > +capture_output=True).stdout)
> > > > > > +
> > > > > > +elif os.name == 'nt':
> > > > > > +    libkernel32 = ctypes.windll.kernel32
> > > > > > +
> > > > > > +    count = ctypes.c_ulong()
> > > > > > +
> > > > > > +
> libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > > > > > +    print(count.value + 1)
> > > > > > diff --git a/buildtools/meson.build b/buildtools/meson.build
> > > > > > index
> > > > > > 04808dabc..925e733b1 100644
> > > > > > --- a/buildtools/meson.build
> > > > > > +++ b/buildtools/meson.build
> > > > > > @@ -17,3 +17,5 @@ else
> > > > > >  endif
> > > > > >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper
> > > > > > =
> > > > > > py3 +
> > > > > > files('call-sphinx-build.py')
> > > > > > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > > > > > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > > > > > diff --git a/config/meson.build b/config/meson.build index
> > > > > > a57c8ae9e..c4477f977 100644
> > > > > > --- a/config/meson.build
> > > > > > +++ b/config/meson.build
> > > > > > @@ -74,7 +74,11 @@ endif
> > > > > >  # still being able to support the CPU features required for DPDK.
> > > > > >  # This can be bumped up by the DPDK project, but it can never
> > > > > > be an # invariant like 'native'
> > > > > > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > > > > > +get_option('max_numa_nodes')
> > > > > >  if machine == 'default'
> > > > > > +	max_numa_nodes = 4
> > > > > > +	max_lcores = 128
> > > > >
> > > > > This doesn't seem right, since you are overriding the
> > > > > user-specified values with hard-coded ones.
> > > > >
> > > >
> > > > I understand we're using the default build/generic to build
> > > > portalbe dpdk
> > > distro packages, meaning the settings for those packages should
> > > always be the same, no? If not, what should the default/generic
> > > build be? And when would someone do a default/generic build with
> > > their values? It wouldn't be a default/generic anymore, right?
> > > >
> > > > > >  	if host_machine.cpu_family().startswith('x86')
> > > > > >  		# matches the old pre-meson build systems default
> > > > > >  		machine = 'corei7'
> > > > > > @@ -83,6 +87,22 @@ if machine == 'default'
> > > > > >  	elif host_machine.cpu_family().startswith('ppc')
> > > > > >  		machine = 'power8'
> > > > > >  	endif
> > > > > > +elif not meson.is_cross_build()
> > > > > > +	# find host core count and numa node count for native builds
> > > > > > +	if max_lcores == 0
> > > > > > +		max_lcores =
> > > > > run_command(get_cpu_count_cmd).stdout().to_int()
> > > > > > +		min_lcores = 2
> > > > > > +		if max_lcores < min_lcores
> > > > > > +			message('Found less than @0@ cores,
> building for
> > > > > @0@ cores'.format(min_lcores))
> > > > > > +			max_lcores = min_lcores
> > > > > > +		else
> > > > > > +			message('Found @0@
> cores'.format(max_lcores))
> > > > > > +		endif
> > > > > > +	endif
> > > > > > +	if max_numa_nodes == 0
> > > > > > +		max_numa_nodes =
> > > > > run_command(get_numa_count_cmd).stdout().to_int()
> > > > > > +		message('Found @0@ numa
> > > nodes'.format(max_numa_nodes))
> > > > > > +	endif
> > > > > >  endif
> > > > > >
> > > > > >  dpdk_conf.set('RTE_MACHINE', machine) @@ -227,8 +247,10 @@
> > > > > > foreach
> > > > > > arg: warning_flags  endforeach
> > > > > >
> > > > > >  # set other values pulled from the build options
> > > > > > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > > > > > -dpdk_conf.set('RTE_MAX_NUMA_NODES',
> > > get_option('max_numa_nodes'))
> > > > > > +if not meson.is_cross_build()
> > > > > > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > > +	dpdk_conf.set('RTE_MAX_NUMA_NODES',
> max_numa_nodes)
> > > endif
> > > > >
> > > > > Rather than conditionally setting the value here, you should
> > > > > move the checks below up above this to simplify things.
> > > > >
> > > >
> > > > Do you mean the cross build checks? Those have to be after
> > > subdir(arch_subdir) so that we can override the values from cross
> > > files (as the commit message says).
> > > >
> > > > > >  dpdk_conf.set('RTE_MAX_ETHPORTS',
> get_option('max_ethports'))
> > > > > > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > > > > > dpdk_conf.set('RTE_ENABLE_TRACE_FP',
> > > > > > get_option('enable_trace_fp')) @@
> > > > > > -247,6 +269,22 @@ compile_time_cpuflags = []
> > > > > >  subdir(arch_subdir)
> > > > > >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > > > > > ','.join(compile_time_cpuflags))
> > > > > >
> > > > > > +# check that cpu and numa count is set in cross builds if
> > > > > > +meson.is_cross_build()
> > > > > > +    	if max_lcores > 0
> > > > > > +		# specified on the cmdline
> > > > > > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > > > > > +	elif not dpdk_conf.has('RTE_MAX_LCORE')
> > > > > > +		error('Number of cores for cross build not specified in
> @0@
> > > > > subdir (e.g. in a cross-file) nor on the
> > > > > cmdline'.format(arch_subdir))
> > > > > > +	endif
> > > > > > +	if max_numa_nodes > 0
> > > > > > +		# specified on the cmdline
> > > > > > +		dpdk_conf.set('RTE_MAX_NUMA_NODES',
> > > max_numa_nodes)
> > > > > > +	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > > > > > +		error('Number of numa nodes for cross build not
> specified
> > > > > > +in
> > > > > @0@ subdir (e.g. in a cross-file) nor on the
> > > > > cmdline'.format(arch_subdir))
> > > > > > +	endif
> > > > > > +endif
> > > > > > +
> > > > > >  # set the install path for the drivers
> > > > > > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> > > > > >
> > > > > > diff --git a/meson_options.txt b/meson_options.txt index
> > > > > > 9bf18ab6b..01b0c45c3 100644
> > > > > > --- a/meson_options.txt
> > > > > > +++ b/meson_options.txt
> > > > > > @@ -26,10 +26,10 @@ option('machine', type: 'string', value:
> 'native',
> > > > > >  	description: 'set the target machine type')
> > > > > > option('max_ethports',
> > > > > > type: 'integer', value: 32,
> > > > > >  	description: 'maximum number of Ethernet devices')
> > > > > > -option('max_lcores', type: 'integer', value: 128,
> > > > > > -	description: 'maximum number of cores/threads supported
> by EAL')
> > > > > > -option('max_numa_nodes', type: 'integer', value: 4,
> > > > > > -	description: 'maximum number of NUMA nodes supported
> by EAL')
> > > > > > +option('max_lcores', type: 'integer', value: 0,
> > > > > > +	description: 'maximum number of cores/threads supported
> by EAL.
> > > > > > +Value 0 means the number of cpus on the host will be used.
> > > > > > +For cross build,
> > > > > set to non-zero to overwrite the cross-file value.')
> > > > > option('max_numa_nodes',
> > > > > type: 'integer', value: 0,
> > > > > > +	description: 'maximum number of NUMA nodes supported
> by EAL.
> > > > > > +Value
> > > > > 0
> > > > > > +means the number of numa nodes on the host will be used. For
> > > > > > +cross build, set to non-zero to overwrite the cross-file
> > > > > > +value.')
> > > > >
> > > > > I don't like this change, because it very much assumes for
> > > > > non-cross-compiles that people will be running DPDK on the
> > > > > system they build it on. That's a very, very big assumption!
> > > >
> > > > I'll be using definitions from https://mesonbuild.com/Cross-
> > > compilation.html.
> > > > I understand cross compilation to be building for a diffent host
> > > > machine
> > > than the build machine (which is aligned with pretty much every
> > > definition I found). I understand this to be true not only for
> > > builds between architectures, but also within an architecture (e.g.
> > > x86_64 build machine building for x86_64 host machine).
> > > > So yes, when someone does a native build, it stands to reason they
> > > > want to
> > > use it on the build machine. If they wanted to use it elsewhere,
> > > they would cross compile.
> > > > Another thing is the current build philosophy is to detect as much
> > > > as
> > > possible (not having statically defined configuration, as you
> > > mentioned in the past). Detecting the number of cores and numa nodes
> fits this perfectly.
> > > > And yet another thing is that the assumption seems to be already
> > > > present
> > > in the build system - it already detects a lot things, some of which
> > > may not be satisfied on machines other than the build machine. I may
> > > be wrong about this.
> > > >
> > > > > I'm ok with having zero as a "detect" option, and having the
> > > > > values overridden from cross-files, but not with detection as
> > > > > the default
> > > > > out- of-the-box option! Lots of users may pull builds from a CI
> > > > > based on VMs with just a few cores, for instance.
> > > >
> > > > If not having the automatic detection is a concern because of
> > > > users using CI
> > > builds, then we (if it's from our CI) can change what we're building
> > > in CI - the default/generic build seems like a good fit because it's
> > > supposed to work on a variety of systems. Expecting that native
> > > build from random VMs would work anywhere doesn't seen very
> > > realistic - it's been build for that VM environment (because it's a native
> build).
> > > >
> > > > Here's my understanding on which the current version is based:
> > > > 1. Since we want to get away from having statically defined
> > > > configuration,
> > > numa and core count discovery is exactly what we should have in the
> > > build system. Since discorery is currently the default for
> > > lib/drivers, it stands to reason it should be default for everything else, if
> possible.
> > > > 2. Native build should produce binaries matching the build machine
> > > > as well
> > > as possible.
> > > > 3. Default/generic build should produce binaries executable on a
> > > > range of
> > > systems (ideally all systems of a given architecture).
> > > > 4. Other builds, that is non-native builds, are cross-compilation,
> > > > since we're
> > > building for host machine other that the build machine.
> > > >
> > > > What I haven't taken into account is users using CI builds. That
> > > > could be
> > > remedied by modifying the CI a bit while being consistent with what
> > > native/default/generic/cross builds are (or should be). And in any
> > > case, if we're not interested in testing the exact CI environment
> > > (which we aren't, since we don't want to use 2 cores with 1 numa),
> > > we really shouldn't be doing native builds there.
> > > >
> > > > I'm interested in hearing where my thinking deviates from yours.
> > > >
> > >
> > > There are a number of points in which we differ, I think.
> > >
> > > Firstly, the use of "native" and "default/generic" for the "machine"
> > > parameter refers only to the instruction-set level from the
> > > compiler, and should not affect any other settings, since all settings are
> independent.
> > > Therefore, setting "machine" to "native" does not mean that we
> > > should detect cores and numa nodes, and similarly setting it to
> > > "default" does not mean that we should ignore the settings for these
> > > values and pick our own chosen default values.
> > Apologies to go to an older discussion.
> > I am trying to understand the definitions/expectations for 'native' and
> 'generic' builds.
> > As you say, instruction-set level is definitely one parameter.
> 
> Part of the confusion arises from the fact that originally that was the only
> parameter set by this - and on x86 it still is. Perhaps this parameter needs to
Just wondering, for x86, what does it mean if we set the max_num_cores and max_numa_nodes based on dynamic detection for 'native' build?
ISA still remains the same as before. But, the build might not work on machines with higher number of cores and numa nodes.
At the same time, the build also might not work on a machine with a different ISA. The users need to be aware that the target machine has the same ISA and same number of cores/numa nodes as the target machine.

> be renamed to "isa-level" or "architecture-flag" or similar to reflect its
> meaning. This would then allow a new "machine" setting, which can be
> considered separately. The question then is how much that helps with the
> main issue under discussion, that of cores and numa node values.
If we rename it, we will have backward compatibility issue (i.e. 'native' build on x86 will have different meaning and whoever wants the original meaning, have to change to using this new name). Not sure about the complexity in meson scripts.


> 
> > But, I think other DPDK specific parameters should also be considered.
> > For ex: RTE_MAX_LCORE should have a particular value for 'generic' build in
> all the supported architectures. The value could be different for each
> architecture, but it is fixed for the 'generic' build for a given architecture.
> Otherwise, the 'generic' build might not run on all the machines of that
> architecture.
> >
> > Similarly, for 'native' build, is there any reason not to include other DPDK
> parameters as part of the definition? IMO, 'native' should refer to the entire
> build machine, not just the ISA. i.e. build on the target machine.
> >
> 
> While I understand the idea here, it is somewhat complicated by the fact that
> the meson options given in "meson_options.txt" cannot be set by meson
> code, which means that when we change the machine flag to "native" we
> can only use or ignore the user-provided lcores and numa nodes setting - we
> have no way to change them and reflect those changes back to the user. :-(
> This leads to the situation in the discussion in this thread, where we start
> needing all sorts of magic values to indicate use of machine-type defaults or
> detected defaults.
I am wondering why we need to take the max_num_cores and max_numa_nodes from the user? This option was not provided in the make build system. I ask this question because for 'generic' this has to be a static/known configuration. For cross builds, this info can come (or derived) from the cross build file.
Was it supposed to be used in conjunction with 'native' build?

> 
> Regards,
> /Bruce
  
Bruce Richardson Nov. 3, 2020, 9:44 a.m. UTC | #10
On Mon, Nov 02, 2020 at 07:01:44PM +0000, Honnappa Nagarahalli wrote:
> <snip>
> 
> > >
> > Part of the confusion arises from the fact that originally that was the only
> > parameter set by this - and on x86 it still is. Perhaps this parameter needs to
> Just wondering, for x86, what does it mean if we set the max_num_cores and max_numa_nodes based on dynamic detection for 'native' build?
> ISA still remains the same as before. But, the build might not work on machines with higher number of cores and numa nodes.
> At the same time, the build also might not work on a machine with a different ISA. The users need to be aware that the target machine has the same ISA and same number of cores/numa nodes as the target machine.
> 
Yes, that is a fair summary.

> > be renamed to "isa-level" or "architecture-flag" or similar to reflect its
> > meaning. This would then allow a new "machine" setting, which can be
> > considered separately. The question then is how much that helps with the
> > main issue under discussion, that of cores and numa node values.
> If we rename it, we will have backward compatibility issue (i.e. 'native' build on x86 will have different meaning and whoever wants the original meaning, have to change to using this new name). Not sure about the complexity in meson scripts.
> 

Yep, it was just a thought to see if it could help in this situation.

> 
> > 
> > > But, I think other DPDK specific parameters should also be considered.
> > > For ex: RTE_MAX_LCORE should have a particular value for 'generic' build in
> > all the supported architectures. The value could be different for each
> > architecture, but it is fixed for the 'generic' build for a given architecture.
> > Otherwise, the 'generic' build might not run on all the machines of that
> > architecture.
> > >
> > > Similarly, for 'native' build, is there any reason not to include other DPDK
> > parameters as part of the definition? IMO, 'native' should refer to the entire
> > build machine, not just the ISA. i.e. build on the target machine.
> > >
> > 
> > While I understand the idea here, it is somewhat complicated by the fact that
> > the meson options given in "meson_options.txt" cannot be set by meson
> > code, which means that when we change the machine flag to "native" we
> > can only use or ignore the user-provided lcores and numa nodes setting - we
> > have no way to change them and reflect those changes back to the user. :-(
> > This leads to the situation in the discussion in this thread, where we start
> > needing all sorts of magic values to indicate use of machine-type defaults or
> > detected defaults.
> I am wondering why we need to take the max_num_cores and max_numa_nodes from the user? This option was not provided in the make build system. I ask this question because for 'generic' this has to be a static/known configuration. For cross builds, this info can come (or derived) from the cross build file.
> Was it supposed to be used in conjunction with 'native' build?
> 

Well, it was configurable in the build config files same as all other DPDK
build settings with make. When working first on meson, I felt it was a
setting the user might be likely to want to tune, which is why I put it
into the meson_options.txt and nobody suggested otherwise on review [which
is the reason why many of the current options are the way they are :-)].

From my side, I have a couple of unknowns:
1. How big a difference in terms of memory use etc. of DPDK does it make by
   having really big values for these core/numa counts? If there is not much
   difference, then there is indeed little value in having them configurable
   at all, and we should just use big defaults and be done with it.
2. If there is a noticable difference in these settings, how many users are
   going to want to actually go to the trouble of tweaking these?
3. How big an effort is it to switch to having these settings made entirely
   dynamic at runtime? Doing so would naturally make the need for these
   settings completely go away.

With all that said, I'd be ok with a number of solutions. I'm ok to have
these dropped as meson options and just have them specified in other ways,
e.g. cross-file, or from meson.build files. [For x86, I'd tend towards
having them defined in rte_config.h inside x86-specific ifdefs].
Alternatively, I'm also happy enough with the proposed scheme here of
allowing user override, with platform defaults using "0"-value and
detection using "-1".

Regards,
/Bruce
  
Juraj Linkeš Nov. 5, 2020, 9:23 a.m. UTC | #11
> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Tuesday, November 3, 2020 10:45 AM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Cc: Juraj Linkeš <juraj.linkes@pantheon.tech>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Phil Yang <Phil.Yang@arm.com>;
> vcchunga@amazon.com; Dharmik Thakkar <Dharmik.Thakkar@arm.com>;
> jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org; nd
> <nd@arm.com>
> Subject: Re: [RFC PATCH v3 3/6] build: automatic NUMA and cpu counts
> detection
> 
> On Mon, Nov 02, 2020 at 07:01:44PM +0000, Honnappa Nagarahalli wrote:
> > <snip>
> >
> > > >
> > > Part of the confusion arises from the fact that originally that was
> > > the only parameter set by this - and on x86 it still is. Perhaps
> > > this parameter needs to
> > Just wondering, for x86, what does it mean if we set the max_num_cores and
> max_numa_nodes based on dynamic detection for 'native' build?
> > ISA still remains the same as before. But, the build might not work on
> machines with higher number of cores and numa nodes.
> > At the same time, the build also might not work on a machine with a different
> ISA. The users need to be aware that the target machine has the same ISA and
> same number of cores/numa nodes as the target machine.
> >
> Yes, that is a fair summary.
> 

There's also additional confusion in what the build should produce with no user input. You mentioned that builds from CI are used outside of CI and when a user compiles up a binary without settings these [cpu and numa counts] explicitly it should run on 95%+ of systems of that type (why not 100%?). These requirements/expectations suggest that generic settings should be the default (machine=generic so that CI builds are usable on machines with different ISA and then using generic defaults for other settings (we're talking mainly about cpu and numa counts, but it should apply to everything - there are more settings that need generic defaults for Arm)).
If we change the default to generic, then setting machine=native would only set the machine args and not change the behavior of other vars (i.e. use defaults for the build machine).

> > > be renamed to "isa-level" or "architecture-flag" or similar to
> > > reflect its meaning. This would then allow a new "machine" setting,
> > > which can be considered separately. The question then is how much
> > > that helps with the main issue under discussion, that of cores and numa node
> values.
> > If we rename it, we will have backward compatibility issue (i.e. 'native' build on
> x86 will have different meaning and whoever wants the original meaning, have
> to change to using this new name). Not sure about the complexity in meson
> scripts.
> >
> 
> Yep, it was just a thought to see if it could help in this situation.
> 

I also don't think renaming or repurposing the machine meson option is the way to go, not only because of backwards compatibility but also because the option sets RTE_MACHINE and machine args, which lines up with the name nicely. As a side note, I grepped RTE_MACHINE and I don't see it being used anywhere - do we need that config?
But this doesn't work for Arm, since setting just machine args doesn't make sense - setting that also necessitates setting of some other dpdk config as well.

What could help is new meson option. I'm working on adding an option where the user could specify the set of target configuration, or soc configuration. I could add the configuration for generic build as one of the available sets and then we'd be able to use that variable instead of machine for Arm builds.

> >
> > >
> > > > But, I think other DPDK specific parameters should also be considered.
> > > > For ex: RTE_MAX_LCORE should have a particular value for 'generic'
> > > > build in
> > > all the supported architectures. The value could be different for
> > > each architecture, but it is fixed for the 'generic' build for a given
> architecture.
> > > Otherwise, the 'generic' build might not run on all the machines of
> > > that architecture.
> > > >
> > > > Similarly, for 'native' build, is there any reason not to include
> > > > other DPDK
> > > parameters as part of the definition? IMO, 'native' should refer to
> > > the entire build machine, not just the ISA. i.e. build on the target machine.
> > > >
> > >
> > > While I understand the idea here, it is somewhat complicated by the
> > > fact that the meson options given in "meson_options.txt" cannot be
> > > set by meson code, which means that when we change the machine flag
> > > to "native" we can only use or ignore the user-provided lcores and
> > > numa nodes setting - we have no way to change them and reflect those
> > > changes back to the user. :-( This leads to the situation in the
> > > discussion in this thread, where we start needing all sorts of magic
> > > values to indicate use of machine-type defaults or detected defaults.
> > I am wondering why we need to take the max_num_cores and
> max_numa_nodes from the user? This option was not provided in the make build
> system. I ask this question because for 'generic' this has to be a static/known
> configuration. For cross builds, this info can come (or derived) from the cross
> build file.
> > Was it supposed to be used in conjunction with 'native' build?
> >
> 
> Well, it was configurable in the build config files same as all other DPDK build
> settings with make. When working first on meson, I felt it was a setting the user
> might be likely to want to tune, which is why I put it into the meson_options.txt
> and nobody suggested otherwise on review [which is the reason why many of
> the current options are the way they are :-)].
> 
> From my side, I have a couple of unknowns:
> 1. How big a difference in terms of memory use etc. of DPDK does it make by
>    having really big values for these core/numa counts? If there is not much
>    difference, then there is indeed little value in having them configurable
>    at all, and we should just use big defaults and be done with it.
> 2. If there is a noticable difference in these settings, how many users are
>    going to want to actually go to the trouble of tweaking these?
> 3. How big an effort is it to switch to having these settings made entirely
>    dynamic at runtime? Doing so would naturally make the need for these
>    settings completely go away.
> 
> With all that said, I'd be ok with a number of solutions. I'm ok to have these
> dropped as meson options and just have them specified in other ways, e.g.
> cross-file, or from meson.build files. [For x86, I'd tend towards having them
> defined in rte_config.h inside x86-specific ifdefs].
> Alternatively, I'm also happy enough with the proposed scheme here of allowing
> user override, with platform defaults using "0"-value and detection using "-1".
> 
> Regards,
> /Bruce
  
Honnappa Nagarahalli Nov. 6, 2020, 11:40 p.m. UTC | #12
<snip>

> >
> > > >
> > > Part of the confusion arises from the fact that originally that was
> > > the only parameter set by this - and on x86 it still is. Perhaps
> > > this parameter needs to
> > Just wondering, for x86, what does it mean if we set the max_num_cores
> and max_numa_nodes based on dynamic detection for 'native' build?
> > ISA still remains the same as before. But, the build might not work on
> machines with higher number of cores and numa nodes.
> > At the same time, the build also might not work on a machine with a
> different ISA. The users need to be aware that the target machine has the
> same ISA and same number of cores/numa nodes as the target machine.
> >
> Yes, that is a fair summary.
> 
> > > be renamed to "isa-level" or "architecture-flag" or similar to
> > > reflect its meaning. This would then allow a new "machine" setting,
> > > which can be considered separately. The question then is how much
> > > that helps with the main issue under discussion, that of cores and numa
> node values.
> > If we rename it, we will have backward compatibility issue (i.e. 'native' build
> on x86 will have different meaning and whoever wants the original meaning,
> have to change to using this new name). Not sure about the complexity in
> meson scripts.
> >
> 
> Yep, it was just a thought to see if it could help in this situation.
> 
> >
> > >
> > > > But, I think other DPDK specific parameters should also be considered.
> > > > For ex: RTE_MAX_LCORE should have a particular value for 'generic'
> > > > build in
> > > all the supported architectures. The value could be different for
> > > each architecture, but it is fixed for the 'generic' build for a given
> architecture.
> > > Otherwise, the 'generic' build might not run on all the machines of
> > > that architecture.
> > > >
> > > > Similarly, for 'native' build, is there any reason not to include
> > > > other DPDK
> > > parameters as part of the definition? IMO, 'native' should refer to
> > > the entire build machine, not just the ISA. i.e. build on the target machine.
> > > >
> > >
> > > While I understand the idea here, it is somewhat complicated by the
> > > fact that the meson options given in "meson_options.txt" cannot be
> > > set by meson code, which means that when we change the machine flag
> > > to "native" we can only use or ignore the user-provided lcores and
> > > numa nodes setting - we have no way to change them and reflect those
> > > changes back to the user. :-( This leads to the situation in the
> > > discussion in this thread, where we start needing all sorts of magic
> > > values to indicate use of machine-type defaults or detected defaults.
> > I am wondering why we need to take the max_num_cores and
> max_numa_nodes from the user? This option was not provided in the make
> build system. I ask this question because for 'generic' this has to be a
> static/known configuration. For cross builds, this info can come (or derived)
> from the cross build file.
> > Was it supposed to be used in conjunction with 'native' build?
> >
> 
> Well, it was configurable in the build config files same as all other DPDK build
> settings with make. When working first on meson, I felt it was a setting the
> user might be likely to want to tune, which is why I put it into the
> meson_options.txt and nobody suggested otherwise on review [which is the
> reason why many of the current options are the way they are :-)].
> 
> From my side, I have a couple of unknowns:
> 1. How big a difference in terms of memory use etc. of DPDK does it make by
>    having really big values for these core/numa counts? If there is not much
>    difference, then there is indeed little value in having them configurable
>    at all, and we should just use big defaults and be done with it.
> 2. If there is a noticable difference in these settings, how many users are
>    going to want to actually go to the trouble of tweaking these?
> 3. How big an effort is it to switch to having these settings made entirely
>    dynamic at runtime? Doing so would naturally make the need for these
>    settings completely go away.
A lot of the usage seems to be in the test code. Are these constants exposed to the users?

> 
> With all that said, I'd be ok with a number of solutions. I'm ok to have these
> dropped as meson options and just have them specified in other ways, e.g.
> cross-file, or from meson.build files. [For x86, I'd tend towards having them
> defined in rte_config.h inside x86-specific ifdefs].
> Alternatively, I'm also happy enough with the proposed scheme here of
> allowing user override, with platform defaults using "0"-value and detection
> using "-1".
Ok, we will stick to the methods in this patch.

> 
> Regards,
> /Bruce
  

Patch

diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
new file mode 100644
index 000000000..386f85f8b
--- /dev/null
+++ b/buildtools/get_cpu_count.py
@@ -0,0 +1,7 @@ 
+#!/usr/bin/python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 PANTHEON.tech s.r.o.
+
+import os
+
+print(os.cpu_count())
diff --git a/buildtools/get_numa_count.py b/buildtools/get_numa_count.py
new file mode 100644
index 000000000..f0c49973a
--- /dev/null
+++ b/buildtools/get_numa_count.py
@@ -0,0 +1,22 @@ 
+#!/usr/bin/python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 PANTHEON.tech s.r.o.
+
+import ctypes
+import glob
+import os
+import subprocess
+
+if os.name == 'posix':
+    if os.path.isdir('/sys/devices/system/node'):
+        print(len(glob.glob('/sys/devices/system/node/node*')))
+    else:
+        print(subprocess.run(['sysctl', 'vm.ndomains'], capture_output=True).stdout)
+
+elif os.name == 'nt':
+    libkernel32 = ctypes.windll.kernel32
+
+    count = ctypes.c_ulong()
+
+    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
+    print(count.value + 1)
diff --git a/buildtools/meson.build b/buildtools/meson.build
index 04808dabc..925e733b1 100644
--- a/buildtools/meson.build
+++ b/buildtools/meson.build
@@ -17,3 +17,5 @@  else
 endif
 map_to_win_cmd = py3 + files('map_to_win.py')
 sphinx_wrapper = py3 + files('call-sphinx-build.py')
+get_cpu_count_cmd = py3 + files('get_cpu_count.py')
+get_numa_count_cmd = py3 + files('get_numa_count.py')
diff --git a/config/meson.build b/config/meson.build
index a57c8ae9e..c4477f977 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -74,7 +74,11 @@  endif
 # still being able to support the CPU features required for DPDK.
 # This can be bumped up by the DPDK project, but it can never be an
 # invariant like 'native'
+max_lcores = get_option('max_lcores')
+max_numa_nodes = get_option('max_numa_nodes')
 if machine == 'default'
+	max_numa_nodes = 4
+	max_lcores = 128
 	if host_machine.cpu_family().startswith('x86')
 		# matches the old pre-meson build systems default
 		machine = 'corei7'
@@ -83,6 +87,22 @@  if machine == 'default'
 	elif host_machine.cpu_family().startswith('ppc')
 		machine = 'power8'
 	endif
+elif not meson.is_cross_build()
+	# find host core count and numa node count for native builds
+	if max_lcores == 0
+		max_lcores = run_command(get_cpu_count_cmd).stdout().to_int()
+		min_lcores = 2
+		if max_lcores < min_lcores
+			message('Found less than @0@ cores, building for @0@ cores'.format(min_lcores))
+			max_lcores = min_lcores
+		else
+			message('Found @0@ cores'.format(max_lcores))
+		endif
+	endif
+	if max_numa_nodes == 0
+		max_numa_nodes = run_command(get_numa_count_cmd).stdout().to_int()
+		message('Found @0@ numa nodes'.format(max_numa_nodes))
+	endif
 endif
 
 dpdk_conf.set('RTE_MACHINE', machine)
@@ -227,8 +247,10 @@  foreach arg: warning_flags
 endforeach
 
 # set other values pulled from the build options
-dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
-dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
+if not meson.is_cross_build()
+	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
+	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
+endif
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
@@ -247,6 +269,22 @@  compile_time_cpuflags = []
 subdir(arch_subdir)
 dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
 
+# check that cpu and numa count is set in cross builds
+if meson.is_cross_build()
+    	if max_lcores > 0
+		# specified on the cmdline
+		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
+	elif not dpdk_conf.has('RTE_MAX_LCORE')
+		error('Number of cores for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
+	endif
+	if max_numa_nodes > 0
+		# specified on the cmdline
+		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
+	elif not dpdk_conf.has('RTE_MAX_NUMA_NODES')
+		error('Number of numa nodes for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
+	endif
+endif
+
 # set the install path for the drivers
 dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
 
diff --git a/meson_options.txt b/meson_options.txt
index 9bf18ab6b..01b0c45c3 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -26,10 +26,10 @@  option('machine', type: 'string', value: 'native',
 	description: 'set the target machine type')
 option('max_ethports', type: 'integer', value: 32,
 	description: 'maximum number of Ethernet devices')
-option('max_lcores', type: 'integer', value: 128,
-	description: 'maximum number of cores/threads supported by EAL')
-option('max_numa_nodes', type: 'integer', value: 4,
-	description: 'maximum number of NUMA nodes supported by EAL')
+option('max_lcores', type: 'integer', value: 0,
+	description: 'maximum number of cores/threads supported by EAL. Value 0 means the number of cpus on the host will be used. For cross build, set to non-zero to overwrite the cross-file value.')
+option('max_numa_nodes', type: 'integer', value: 0,
+	description: 'maximum number of NUMA nodes supported by EAL. Value 0 means the number of numa nodes on the host will be used. For cross build, set to non-zero to overwrite the cross-file value.')
 option('enable_trace_fp', type: 'boolean', value: false,
 	description: 'enable fast path trace points.')
 option('tests', type: 'boolean', value: true,