[1/5] app/test-pm: add multiprocess test
Checks
Commit Message
This commit adds a test scenario that initiates multiple processes
concurrently. These processes attach to the same shared heap, with an
automatic detection mechanism to identify the primary process.
Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
app/meson.build | 1 +
app/test-mp/main.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
app/test-mp/meson.build | 8 ++++++++
app/test-mp/run.sh | 39 +++++++++++++++++++++++++++++++++++++++
4 files changed, 97 insertions(+)
create mode 100644 app/test-mp/main.c
create mode 100644 app/test-mp/meson.build
create mode 100755 app/test-mp/run.sh
Comments
On Tue, 12 Dec 2023 06:25:12 +0200
Artemy Kovalyov <artemyko@nvidia.com> wrote:
> +rte_atomic32_t g_count;
> +
> +static int
> +done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
> +{
> + rte_atomic32_dec(&g_count);
> + return 0;
> +}
Local variable, should be static.
Also, assert may not be the ideal way to report test failures.
The preferred way would be to use RTE_TEST_ASSERT() and RTE_TEST_ASSERT_EQUAL()
In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.
Artemy Kovalyov (5):
app/test-mp: add multiprocess test
eal: fix multiprocess hotplug race
ipc: fix mp channel closure to prevent message loss
eal: fix first time primary autodetect
eal: fix memzone fbarray cleanup
app/meson.build | 1 +
app/test-mp/main.c | 52 +++++++++++++++++++++++++++++++++++++
app/test-mp/meson.build | 8 ++++++
app/test-mp/run.sh | 40 ++++++++++++++++++++++++++++
lib/eal/common/eal_common_memzone.c | 12 +++++++++
lib/eal/common/eal_common_proc.c | 4 +--
lib/eal/common/eal_private.h | 5 ++++
lib/eal/common/hotplug_mp.c | 3 +++
lib/eal/linux/eal.c | 3 ++-
9 files changed, 125 insertions(+), 3 deletions(-)
create mode 100644 app/test-mp/main.c
create mode 100644 app/test-mp/meson.build
create mode 100755 app/test-mp/run.sh
In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.
Artemy Kovalyov (5):
app/test-mp: add multiprocess test
eal: fix multiprocess hotplug race
ipc: fix mp channel closure to prevent message loss
eal: fix first time primary autodetect
eal: fix memzone fbarray cleanup
app/meson.build | 1 +
app/test-mp/main.c | 52 +++++++++++++++++++++++++++++++++++++
app/test-mp/meson.build | 8 ++++++
app/test-mp/run.sh | 40 ++++++++++++++++++++++++++++
lib/eal/common/eal_common_memzone.c | 12 +++++++++
lib/eal/common/eal_common_proc.c | 4 +--
lib/eal/common/eal_private.h | 5 ++++
lib/eal/common/hotplug_mp.c | 3 +++
lib/eal/linux/eal.c | 3 ++-
9 files changed, 125 insertions(+), 3 deletions(-)
create mode 100644 app/test-mp/main.c
create mode 100644 app/test-mp/meson.build
create mode 100755 app/test-mp/run.sh
@@ -30,6 +30,7 @@ apps = [
'test-flow-perf',
'test-gpudev',
'test-mldev',
+ 'test-mp',
'test-pipeline',
'test-pmd',
'test-regex',
new file mode 100644
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+
+rte_atomic32_t g_count;
+
+static int
+done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
+{
+ rte_atomic32_dec(&g_count);
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ void *p;
+ int ret;
+
+ ret = rte_eal_init(argc, argv);
+ assert(ret >= 0);
+
+ rte_atomic32_set(&g_count, atoi(argv[++ret]));
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_mp_action_register("done", done);
+ assert(ret == 0);
+ }
+
+ p = rte_malloc(NULL, 0x1000000, 0x1000);
+ assert(p);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ uint64_t timeout = rte_rdtsc() + 5 * rte_get_tsc_hz();
+
+ while (rte_atomic32_read(&g_count) > 0)
+ assert(rte_rdtsc() < timeout);
+ } else {
+ struct rte_mp_msg msg = { .name = "done" };
+
+ rte_mp_sendmsg(&msg);
+ }
+
+ rte_eal_cleanup();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,8 @@
+if is_windows
+ build = false
+ reason = 'not supported on Windows'
+ subdir_done()
+endif
+
+sources = files('main.c')
+deps = ['eal'] # , 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
new file mode 100755
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+logdir=/tmp/dpdk_test_mp
+repeat=1
+lastcore=$(($(nproc) - 1))
+log=1
+
+while getopts p:r:lL:d op; do case $op in
+ p) lastcore=$OPTARG ;;
+ r) repeat=$OPTARG ;;
+ L) logdir=$OPTARG ;;
+ l) log=0 ;;
+ d) debug=1 ;;
+esac done
+shift $((OPTIND-1))
+
+test=$1
+logpath=$logdir/$(date +%y%m%d-%H%M%S)
+
+rm -f core.*
+pkill dpdk-test-mp
+
+for j in $(seq $repeat) ; do
+ [ $log ] && mkdir -p $logpath/$j
+ for i in $(seq 0 $lastcore) ; do
+ args="-l $i --file-prefix=dpdk1 --proc-type=auto"
+ if [ $debug ] ; then
+ args="$args --log-level=lib.eal:8"
+ fi
+ if [ $log ] ; then
+ $test $args $lastcore >$logpath/$j/$i.log 2>&1 &
+ else
+ $test $args $lastcore &
+ fi
+ done
+ wait || break
+ [ $(ls core.* 2>/dev/null | wc -l) -gt 0 ] && break
+ echo iteration $j passed
+done