[v2] test/mcslock: move performance test to perf tests
Checks
Commit Message
Move the MCS lock performance test into perf tests.
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
v2:
1. Rebase code.
2. Split from the original patchset.
MAINTAINERS | 1 +
app/test/Makefile | 1 +
app/test/autotest_data.py | 6 +++
app/test/meson.build | 4 +-
app/test/test_mcslock.c | 88 -------------------------------
app/test/test_mcslock_perf.c | 121 +++++++++++++++++++++++++++++++++++++++++++
6 files changed, 132 insertions(+), 89 deletions(-)
create mode 100644 app/test/test_mcslock_perf.c
Comments
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Phil Yang
> Sent: Tuesday, July 28, 2020 5:24 AM
> To: david.marchand@redhat.com; dev@dpdk.org
> Cc: aconole@redhat.com; Honnappa.Nagarahalli@arm.com;
> Ruifeng.Wang@arm.com; nd@arm.com; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: [dpdk-dev] [PATCH v2] test/mcslock: move performance test to perf
> tests
>
> Move the MCS lock performance test into perf tests.
>
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
> v2:
> 1. Rebase code.
> 2. Split from the original patchset.
>
> MAINTAINERS | 1 +
> app/test/Makefile | 1 +
> app/test/autotest_data.py | 6 +++
> app/test/meson.build | 4 +-
> app/test/test_mcslock.c | 88 -------------------------------
> app/test/test_mcslock_perf.c | 121
> +++++++++++++++++++++++++++++++++++++++++++
> 6 files changed, 132 insertions(+), 89 deletions(-) create mode 100644
> app/test/test_mcslock_perf.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 171c4c7..eae6034 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -265,6 +265,7 @@ MCSlock - EXPERIMENTAL
> M: Phil Yang <phil.yang@arm.com>
> F: lib/librte_eal/include/generic/rte_mcslock.h
> F: app/test/test_mcslock.c
> +F: app/test/test_mcslock_perf.c
>
> Ticketlock
> M: Joyce Kong <joyce.kong@arm.com>
> diff --git a/app/test/Makefile b/app/test/Makefile index f406527..e4b3327
> 100644
> --- a/app/test/Makefile
> +++ b/app/test/Makefile
> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c SRCS-y += test_malloc.c SRCS-y
> += test_cycles.c SRCS-y += test_mcslock.c
> +SRCS-y += test_mcslock_perf.c
> SRCS-y += test_spinlock.c
> SRCS-y += test_ticketlock.c
> SRCS-y += test_memory.c
> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index
> 4b7da45..c813b93 100644
> --- a/app/test/autotest_data.py
> +++ b/app/test/autotest_data.py
> @@ -796,6 +796,12 @@
> "Func": default_autotest,
> "Report": None,
> },
> + {
> + "Name": "MCS Lock performance autotest",
> + "Command": "mcslock_perf_autotest",
> + "Func": default_autotest,
> + "Report": None,
> + },
> #
> # Please always make sure that ring_perf is the last test!
> #
> diff --git a/app/test/meson.build b/app/test/meson.build index
> 786a213..59df2fd 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -86,6 +86,7 @@ test_sources = files('commands.c',
> 'test_meter.c',
> 'test_metrics.c',
> 'test_mcslock.c',
> + 'test_mcslock_perf.c',
> 'test_mp_secondary.c',
> 'test_per_lcore.c',
> 'test_pmd_perf.c',
> @@ -297,7 +298,8 @@ perf_test_names = [
> 'hash_readwrite_perf_autotest',
> 'hash_readwrite_lf_perf_autotest',
> 'trace_perf_autotest',
> - 'ipsec_perf_autotest',
> + 'ipsec_perf_autotest',
> + 'mcslock_perf_autotest',
> ]
>
> driver_test_names = [
> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c index
> ddccaaf..a1dc610 100644
> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -32,23 +32,16 @@
> *
> * - The function takes the global lock, display something, then releases
> * the global lock on each core.
> - *
> - * - A load test is carried out, with all cores attempting to lock a single
> - * lock multiple times.
> */
>
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); -
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
>
> rte_mcslock_t *p_ml;
> rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
>
> static unsigned int count;
>
> -static rte_atomic32_t synchro;
> -
> static int
> test_mcslock_per_core(__rte_unused void *arg) { @@ -63,85 +56,8 @@
> test_mcslock_per_core(__rte_unused void *arg)
> return 0;
> }
>
> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
> -
> #define MAX_LOOP 1000000
>
> -static int
> -load_loop_fn(void *func_param)
> -{
> - uint64_t time_diff = 0, begin;
> - uint64_t hz = rte_get_timer_hz();
> - volatile uint64_t lcount = 0;
> - const int use_lock = *(int *)func_param;
> - const unsigned int lcore = rte_lcore_id();
> -
> - /**< Per core me node. */
> - rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> -
> - /* wait synchro */
> - while (rte_atomic32_read(&synchro) == 0)
> - ;
> -
> - begin = rte_get_timer_cycles();
> - while (lcount < MAX_LOOP) {
> - if (use_lock)
> - rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> -
> - lcount++;
> - if (use_lock)
> - rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> - }
> - time_diff = rte_get_timer_cycles() - begin;
> - time_count[lcore] = time_diff * 1000000 / hz;
> - return 0;
> -}
> -
> -static int
> -test_mcslock_perf(void)
> -{
> - unsigned int i;
> - uint64_t total = 0;
> - int lock = 0;
> - const unsigned int lcore = rte_lcore_id();
> -
> - printf("\nTest with no lock on single core...\n");
> - rte_atomic32_set(&synchro, 1);
> - load_loop_fn(&lock);
> - printf("Core [%u] Cost Time = %"PRIu64" us\n",
> - lcore, time_count[lcore]);
> - memset(time_count, 0, sizeof(time_count));
> -
> - printf("\nTest with lock on single core...\n");
> - lock = 1;
> - rte_atomic32_set(&synchro, 1);
> - load_loop_fn(&lock);
> - printf("Core [%u] Cost Time = %"PRIu64" us\n",
> - lcore, time_count[lcore]);
> - memset(time_count, 0, sizeof(time_count));
> -
> - printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> -
> - rte_atomic32_set(&synchro, 0);
> - rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> -
> - /* start synchro and launch test on master */
> - rte_atomic32_set(&synchro, 1);
> - load_loop_fn(&lock);
> -
> - rte_eal_mp_wait_lcore();
> -
> - RTE_LCORE_FOREACH(i) {
> - printf("Core [%u] Cost Time = %"PRIu64" us\n",
> - i, time_count[i]);
> - total += time_count[i];
> - }
> -
> - printf("Total Cost Time = %"PRIu64" us\n", total);
> -
> - return 0;
> -}
> -
> /*
> * Use rte_mcslock_trylock() to trylock a mcs lock object,
> * If it could not lock the object successfully, it would @@ -240,10 +156,6 @@
> test_mcslock(void)
> ret = -1;
> rte_mcslock_unlock(&p_ml, &ml_me);
>
> - /* mcs lock perf test */
> - if (test_mcslock_perf() < 0)
> - return -1;
> -
> return ret;
> }
>
> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c new
> file mode 100644 index 0000000..6948344
> --- /dev/null
> +++ b/app/test/test_mcslock_perf.c
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +#include <rte_mcslock.h>
> +#include <rte_atomic.h>
> +
> +#include "test.h"
> +
> +/*
> + * RTE MCS lock perf test
> + * ======================
> + *
> + * These tests are derived from spin lock perf test cases.
> + *
> + * - A load test is carried out, with all cores attempting to lock a single
> + * lock multiple times.
> + */
> +
> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); rte_mcslock_t
> +*p_ml_perf;
> +
> +static rte_atomic32_t synchro;
> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
> +
> +#define MAX_LOOP 1000000
> +
> +static int
> +load_loop_fn(void *func_param)
> +{
> + uint64_t time_diff = 0, begin;
> + uint64_t hz = rte_get_timer_hz();
> + volatile uint64_t lcount = 0;
> + const int use_lock = *(int *)func_param;
> + const unsigned int lcore = rte_lcore_id();
> +
> + /**< Per core me node. */
> + rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> +
> + /* wait synchro */
> + while (rte_atomic32_read(&synchro) == 0)
> + ;
> +
> + begin = rte_get_timer_cycles();
> + while (lcount < MAX_LOOP) {
> + if (use_lock)
> + rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> +
> + lcount++;
> + if (use_lock)
> + rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> + }
> + time_diff = rte_get_timer_cycles() - begin;
> + time_count[lcore] = time_diff * 1000000 / hz;
> + return 0;
> +}
> +
> +/*
> + * Test rte_eal_get_lcore_state() in addition to mcs locks
> + * as we have "waiting" then "running" lcores.
> + */
> +static int
> +test_mcslock_perf(void)
> +{
> + unsigned int i;
> + uint64_t total = 0;
> + int lock = 0;
> + const unsigned int lcore = rte_lcore_id();
> +
> + printf("\nTest with no lock on single core...\n");
> + rte_atomic32_set(&synchro, 1);
> + load_loop_fn(&lock);
> + printf("Core [%u] Cost Time = %"PRIu64" us\n",
> + lcore, time_count[lcore]);
> + memset(time_count, 0, sizeof(time_count));
> +
> + printf("\nTest with lock on single core...\n");
> + lock = 1;
> + rte_atomic32_set(&synchro, 1);
> + load_loop_fn(&lock);
> + printf("Core [%u] Cost Time = %"PRIu64" us\n",
> + lcore, time_count[lcore]);
> + memset(time_count, 0, sizeof(time_count));
> +
> + printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> +
> + rte_atomic32_set(&synchro, 0);
> + rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> +
> + /* start synchro and launch test on master */
> + rte_atomic32_set(&synchro, 1);
> + load_loop_fn(&lock);
> +
> + rte_eal_mp_wait_lcore();
> +
> + RTE_LCORE_FOREACH(i) {
> + printf("Core [%u] Cost Time = %"PRIu64" us\n",
> + i, time_count[i]);
> + total += time_count[i];
> + }
> +
> + printf("Total Cost Time = %"PRIu64" us\n", total);
> +
> + return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
> --
> 2.7.4
>
Reviewed-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
@@ -265,6 +265,7 @@ MCSlock - EXPERIMENTAL
M: Phil Yang <phil.yang@arm.com>
F: lib/librte_eal/include/generic/rte_mcslock.h
F: app/test/test_mcslock.c
+F: app/test/test_mcslock_perf.c
Ticketlock
M: Joyce Kong <joyce.kong@arm.com>
@@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
SRCS-y += test_malloc.c
SRCS-y += test_cycles.c
SRCS-y += test_mcslock.c
+SRCS-y += test_mcslock_perf.c
SRCS-y += test_spinlock.c
SRCS-y += test_ticketlock.c
SRCS-y += test_memory.c
@@ -796,6 +796,12 @@
"Func": default_autotest,
"Report": None,
},
+ {
+ "Name": "MCS Lock performance autotest",
+ "Command": "mcslock_perf_autotest",
+ "Func": default_autotest,
+ "Report": None,
+ },
#
# Please always make sure that ring_perf is the last test!
#
@@ -86,6 +86,7 @@ test_sources = files('commands.c',
'test_meter.c',
'test_metrics.c',
'test_mcslock.c',
+ 'test_mcslock_perf.c',
'test_mp_secondary.c',
'test_per_lcore.c',
'test_pmd_perf.c',
@@ -297,7 +298,8 @@ perf_test_names = [
'hash_readwrite_perf_autotest',
'hash_readwrite_lf_perf_autotest',
'trace_perf_autotest',
- 'ipsec_perf_autotest',
+ 'ipsec_perf_autotest',
+ 'mcslock_perf_autotest',
]
driver_test_names = [
@@ -32,23 +32,16 @@
*
* - The function takes the global lock, display something, then releases
* the global lock on each core.
- *
- * - A load test is carried out, with all cores attempting to lock a single
- * lock multiple times.
*/
RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
-RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
rte_mcslock_t *p_ml;
rte_mcslock_t *p_ml_try;
-rte_mcslock_t *p_ml_perf;
static unsigned int count;
-static rte_atomic32_t synchro;
-
static int
test_mcslock_per_core(__rte_unused void *arg)
{
@@ -63,85 +56,8 @@ test_mcslock_per_core(__rte_unused void *arg)
return 0;
}
-static uint64_t time_count[RTE_MAX_LCORE] = {0};
-
#define MAX_LOOP 1000000
-static int
-load_loop_fn(void *func_param)
-{
- uint64_t time_diff = 0, begin;
- uint64_t hz = rte_get_timer_hz();
- volatile uint64_t lcount = 0;
- const int use_lock = *(int *)func_param;
- const unsigned int lcore = rte_lcore_id();
-
- /**< Per core me node. */
- rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
-
- /* wait synchro */
- while (rte_atomic32_read(&synchro) == 0)
- ;
-
- begin = rte_get_timer_cycles();
- while (lcount < MAX_LOOP) {
- if (use_lock)
- rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
-
- lcount++;
- if (use_lock)
- rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
- }
- time_diff = rte_get_timer_cycles() - begin;
- time_count[lcore] = time_diff * 1000000 / hz;
- return 0;
-}
-
-static int
-test_mcslock_perf(void)
-{
- unsigned int i;
- uint64_t total = 0;
- int lock = 0;
- const unsigned int lcore = rte_lcore_id();
-
- printf("\nTest with no lock on single core...\n");
- rte_atomic32_set(&synchro, 1);
- load_loop_fn(&lock);
- printf("Core [%u] Cost Time = %"PRIu64" us\n",
- lcore, time_count[lcore]);
- memset(time_count, 0, sizeof(time_count));
-
- printf("\nTest with lock on single core...\n");
- lock = 1;
- rte_atomic32_set(&synchro, 1);
- load_loop_fn(&lock);
- printf("Core [%u] Cost Time = %"PRIu64" us\n",
- lcore, time_count[lcore]);
- memset(time_count, 0, sizeof(time_count));
-
- printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
-
- rte_atomic32_set(&synchro, 0);
- rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
-
- /* start synchro and launch test on master */
- rte_atomic32_set(&synchro, 1);
- load_loop_fn(&lock);
-
- rte_eal_mp_wait_lcore();
-
- RTE_LCORE_FOREACH(i) {
- printf("Core [%u] Cost Time = %"PRIu64" us\n",
- i, time_count[i]);
- total += time_count[i];
- }
-
- printf("Total Cost Time = %"PRIu64" us\n", total);
-
- return 0;
-}
-
/*
* Use rte_mcslock_trylock() to trylock a mcs lock object,
* If it could not lock the object successfully, it would
@@ -240,10 +156,6 @@ test_mcslock(void)
ret = -1;
rte_mcslock_unlock(&p_ml, &ml_me);
- /* mcs lock perf test */
- if (test_mcslock_perf() < 0)
- return -1;
-
return ret;
}
new file mode 100644
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_mcslock.h>
+#include <rte_atomic.h>
+
+#include "test.h"
+
+/*
+ * RTE MCS lock perf test
+ * ======================
+ *
+ * These tests are derived from spin lock perf test cases.
+ *
+ * - A load test is carried out, with all cores attempting to lock a single
+ * lock multiple times.
+ */
+
+RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
+rte_mcslock_t *p_ml_perf;
+
+static rte_atomic32_t synchro;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 1000000
+
+static int
+load_loop_fn(void *func_param)
+{
+ uint64_t time_diff = 0, begin;
+ uint64_t hz = rte_get_timer_hz();
+ volatile uint64_t lcount = 0;
+ const int use_lock = *(int *)func_param;
+ const unsigned int lcore = rte_lcore_id();
+
+ /**< Per core me node. */
+ rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
+
+ /* wait synchro */
+ while (rte_atomic32_read(&synchro) == 0)
+ ;
+
+ begin = rte_get_timer_cycles();
+ while (lcount < MAX_LOOP) {
+ if (use_lock)
+ rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
+
+ lcount++;
+ if (use_lock)
+ rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
+ }
+ time_diff = rte_get_timer_cycles() - begin;
+ time_count[lcore] = time_diff * 1000000 / hz;
+ return 0;
+}
+
+/*
+ * Test rte_eal_get_lcore_state() in addition to mcs locks
+ * as we have "waiting" then "running" lcores.
+ */
+static int
+test_mcslock_perf(void)
+{
+ unsigned int i;
+ uint64_t total = 0;
+ int lock = 0;
+ const unsigned int lcore = rte_lcore_id();
+
+ printf("\nTest with no lock on single core...\n");
+ rte_atomic32_set(&synchro, 1);
+ load_loop_fn(&lock);
+ printf("Core [%u] Cost Time = %"PRIu64" us\n",
+ lcore, time_count[lcore]);
+ memset(time_count, 0, sizeof(time_count));
+
+ printf("\nTest with lock on single core...\n");
+ lock = 1;
+ rte_atomic32_set(&synchro, 1);
+ load_loop_fn(&lock);
+ printf("Core [%u] Cost Time = %"PRIu64" us\n",
+ lcore, time_count[lcore]);
+ memset(time_count, 0, sizeof(time_count));
+
+ printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
+
+ rte_atomic32_set(&synchro, 0);
+ rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
+
+ /* start synchro and launch test on master */
+ rte_atomic32_set(&synchro, 1);
+ load_loop_fn(&lock);
+
+ rte_eal_mp_wait_lcore();
+
+ RTE_LCORE_FOREACH(i) {
+ printf("Core [%u] Cost Time = %"PRIu64" us\n",
+ i, time_count[i]);
+ total += time_count[i];
+ }
+
+ printf("Total Cost Time = %"PRIu64" us\n", total);
+
+ return 0;
+}
+
+REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);