From patchwork Mon Jan 22 14:34:45 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Morten_Br=C3=B8rup?= X-Patchwork-Id: 136035 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id BCF864399A; Mon, 22 Jan 2024 15:34:51 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 54751402DA; Mon, 22 Jan 2024 15:34:51 +0100 (CET) Received: from dkmailrelay1.smartsharesystems.com (smartserver.smartsharesystems.com [77.243.40.215]) by mails.dpdk.org (Postfix) with ESMTP id B74E9402C3 for ; Mon, 22 Jan 2024 15:34:49 +0100 (CET) Received: from smartserver.smartsharesystems.com (smartserver.smartsharesys.local [192.168.4.10]) by dkmailrelay1.smartsharesystems.com (Postfix) with ESMTP id 8E9642049C; Mon, 22 Jan 2024 15:34:49 +0100 (CET) Received: from dkrd2.smartsharesys.local ([192.168.4.12]) by smartserver.smartsharesystems.com with Microsoft SMTPSVC(6.0.3790.4675); Mon, 22 Jan 2024 15:34:48 +0100 From: =?utf-8?q?Morten_Br=C3=B8rup?= To: andrew.rybchenko@oktetlabs.ru, fengchengwen@huawei.com Cc: dev@dpdk.org, =?utf-8?q?Morten_Br=C3=B8rup?= Subject: [PATCH v2] mempool: test performance with larger bursts Date: Mon, 22 Jan 2024 15:34:45 +0100 Message-Id: <20240122143445.45276-1-mb@smartsharesystems.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20240121045249.22465-1-mb@smartsharesystems.com> References: <20240121045249.22465-1-mb@smartsharesystems.com> MIME-Version: 1.0 X-OriginalArrivalTime: 22 Jan 2024 14:34:48.0343 (UTC) FILETIME=[26B40A70:01DA4D40] X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Bursts of up to 64 or 128 packets are not uncommon, so increase the maximum tested get and put burst sizes from 32 to 128. Some applications keep more than 512 objects, so increase the maximum number of kept objects from 512 to 8192, still in jumps of factor four. This exceeds the typical mempool cache size of 512 objects, so the test also exercises the mempool driver. Signed-off-by: Morten Brørup Acked-by: Chengwen Feng --- v2: Addressed feedback by Chengwen Feng * Added get and put burst sizes of 64 packets, which is probably also not uncommon. * Fixed list of number of kept objects so list remains in jumps of factor four. * Added three derivative test cases, for faster testing. --- app/test/test_mempool_perf.c | 107 ++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c index 96de347f04..a5a7d43608 100644 --- a/app/test/test_mempool_perf.c +++ b/app/test/test_mempool_perf.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2014 Intel Corporation - * Copyright(c) 2022 SmartShare Systems + * Copyright(c) 2022-2024 SmartShare Systems */ #include @@ -54,22 +54,24 @@ * * - Bulk size (*n_get_bulk*, *n_put_bulk*) * - * - Bulk get from 1 to 32 - * - Bulk put from 1 to 32 - * - Bulk get and put from 1 to 32, compile time constant + * - Bulk get from 1 to 128 + * - Bulk put from 1 to 128 + * - Bulk get and put from 1 to 128, compile time constant * * - Number of kept objects (*n_keep*) * * - 32 * - 128 * - 512 + * - 2048 + * - 8192 */ #define N 65536 #define TIME_S 5 #define MEMPOOL_ELT_SIZE 2048 -#define MAX_KEEP 512 -#define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) +#define MAX_KEEP 8192 +#define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE*2))-1) /* Number of pointers fitting into one cache line. */ #define CACHE_LINE_BURST (RTE_CACHE_LINE_SIZE / sizeof(uintptr_t)) @@ -204,6 +206,10 @@ per_lcore_mempool_test(void *arg) CACHE_LINE_BURST, CACHE_LINE_BURST); else if (n_get_bulk == 32) ret = test_loop(mp, cache, n_keep, 32, 32); + else if (n_get_bulk == 64) + ret = test_loop(mp, cache, n_keep, 64, 64); + else if (n_get_bulk == 128) + ret = test_loop(mp, cache, n_keep, 128, 128); else ret = -1; @@ -289,9 +295,9 @@ launch_cores(struct rte_mempool *mp, unsigned int cores) static int do_one_mempool_test(struct rte_mempool *mp, unsigned int cores) { - unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 0 }; - unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 0 }; - unsigned int keep_tab[] = { 32, 128, 512, 0 }; + unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 0 }; + unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 0 }; + unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 0 }; unsigned *get_bulk_ptr; unsigned *put_bulk_ptr; unsigned *keep_ptr; @@ -301,6 +307,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores) for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { + if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr) + continue; + use_constant_values = 0; n_get_bulk = *get_bulk_ptr; n_put_bulk = *put_bulk_ptr; @@ -323,7 +332,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores) } static int -test_mempool_perf(void) +do_all_mempool_perf_tests(unsigned int cores) { struct rte_mempool *mp_cache = NULL; struct rte_mempool *mp_nocache = NULL; @@ -376,65 +385,73 @@ test_mempool_perf(void) rte_mempool_obj_iter(default_pool, my_obj_init, NULL); - /* performance test with 1, 2 and max cores */ printf("start performance test (without cache)\n"); - - if (do_one_mempool_test(mp_nocache, 1) < 0) + if (do_one_mempool_test(mp_nocache, cores) < 0) goto err; - if (do_one_mempool_test(mp_nocache, 2) < 0) - goto err; - - if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) - goto err; - - /* performance test with 1, 2 and max cores */ printf("start performance test for %s (without cache)\n", default_pool_ops); - - if (do_one_mempool_test(default_pool, 1) < 0) + if (do_one_mempool_test(default_pool, cores) < 0) goto err; - if (do_one_mempool_test(default_pool, 2) < 0) + printf("start performance test (with cache)\n"); + if (do_one_mempool_test(mp_cache, cores) < 0) goto err; - if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0) + printf("start performance test (with user-owned cache)\n"); + use_external_cache = 1; + if (do_one_mempool_test(mp_nocache, cores) < 0) goto err; - /* performance test with 1, 2 and max cores */ - printf("start performance test (with cache)\n"); + rte_mempool_list_dump(stdout); - if (do_one_mempool_test(mp_cache, 1) < 0) - goto err; + ret = 0; - if (do_one_mempool_test(mp_cache, 2) < 0) - goto err; +err: + rte_mempool_free(mp_cache); + rte_mempool_free(mp_nocache); + rte_mempool_free(default_pool); + return ret; +} - if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0) - goto err; +static int +test_mempool_perf_1core(void) +{ + return do_all_mempool_perf_tests(1); +} - /* performance test with 1, 2 and max cores */ - printf("start performance test (with user-owned cache)\n"); - use_external_cache = 1; +static int +test_mempool_perf_2cores(void) +{ + return do_all_mempool_perf_tests(2); +} - if (do_one_mempool_test(mp_nocache, 1) < 0) - goto err; +static int +test_mempool_perf_allcores(void) +{ + return do_all_mempool_perf_tests(rte_lcore_count()); +} - if (do_one_mempool_test(mp_nocache, 2) < 0) - goto err; +static int +test_mempool_perf(void) +{ + int ret = -1; - if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) + /* performance test with 1, 2 and max cores */ + if (do_all_mempool_perf_tests(1) < 0) + goto err; + if (do_all_mempool_perf_tests(2) < 0) + goto err; + if (do_all_mempool_perf_tests(rte_lcore_count()) < 0) goto err; - - rte_mempool_list_dump(stdout); ret = 0; err: - rte_mempool_free(mp_cache); - rte_mempool_free(mp_nocache); - rte_mempool_free(default_pool); return ret; } REGISTER_PERF_TEST(mempool_perf_autotest, test_mempool_perf); +REGISTER_PERF_TEST(mempool_perf_autotest_1core, test_mempool_perf_1core); +REGISTER_PERF_TEST(mempool_perf_autotest_2cores, test_mempool_perf_2cores); +REGISTER_PERF_TEST(mempool_perf_autotest_allcores, test_mempool_perf_allcores);