From patchwork Tue Jan 24 17:49:35 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kamalakshitha Aligeri X-Patchwork-Id: 122494 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id B2B144247A; Tue, 24 Jan 2023 18:50:13 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4FE4040684; Tue, 24 Jan 2023 18:50:13 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 934AE40150 for ; Tue, 24 Jan 2023 18:50:11 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id A789A4B3; Tue, 24 Jan 2023 09:50:52 -0800 (PST) Received: from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com [10.118.91.158]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id C142C3F64C; Tue, 24 Jan 2023 09:50:10 -0800 (PST) From: Kamalakshitha Aligeri To: bruce.richardson@intel.com, mb@smartsharesystems.com, konstantin.ananyev@huawei.com, olivier.matz@6wind.com, andrew.rybchenko@oktetlabs.ru, Honnappa.Nagarahalli@arm.com Cc: dev@dpdk.org, nd@arm.com, Kamalakshitha Aligeri Subject: [RFCv2 1/2] net/i40e: replace get and put functions Date: Tue, 24 Jan 2023 17:49:35 +0000 Message-Id: <20230124174936.190800-1-kamalakshitha.aligeri@arm.com> X-Mailer: git-send-email 2.25.1 MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Integrated zero-copy get and put API's in mempool cache in i40e PMD Signed-off-by: Kamalakshitha Aligeri --- Link: https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1-mb@smartsharesystems.com/ 1. Added support for mempools without cache (Morten Brorup) .mailmap | 1 + drivers/net/i40e/i40e_rxtx_vec_avx512.c | 35 ++++++++----------------- drivers/net/i40e/i40e_rxtx_vec_common.h | 23 ++++++++++------ drivers/net/i40e/i40e_rxtx_vec_neon.c | 35 ++++++++++++++++--------- 4 files changed, 49 insertions(+), 45 deletions(-) diff --git a/.mailmap b/.mailmap index 75884b6fe2..05a42edbcf 100644 --- a/.mailmap +++ b/.mailmap @@ -670,6 +670,7 @@ Kai Ji Kaiwen Deng Kalesh AP Kamalakannan R +Kamalakshitha Aligeri Kamil Bednarczyk Kamil Chalupnik Kamil Rytarowski diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c index 60c97d5331..a4fba4ddc9 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c @@ -35,6 +35,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) if (unlikely(!cache)) return i40e_rxq_rearm_common(rxq, true); + void **cache_objs; + + cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH); /* We need to pull 'n' more MBUFs into the software ring from mempool * We inline the mempool function here, so we can vectorize the copy @@ -45,15 +48,12 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) /* No. Backfill the cache first, and then fill from it */ uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size - cache->len); - /* How many do we require * i.e. number to fill the cache + the request */ int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, - &cache->objs[cache->len], req); - if (ret == 0) { - cache->len += req; - } else { + cache_objs[cache->len], req); + if (ret != 0) { if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= rxq->nb_rx_desc) { __m128i dma_addr0; @@ -63,11 +63,11 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) rxep[i].mbuf = &rxq->fake_mbuf; _mm_store_si128 ((__m128i *)&rxdp[i].read, - dma_addr0); + dma_addr0); } } rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; + RTE_I40E_RXQ_REARM_THRESH; return; } } @@ -90,7 +90,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) */ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) { const __m512i mbuf_ptrs = _mm512_loadu_si512 - (&cache->objs[cache->len - 8]); + (cache_objs[cache->len - 8]); _mm512_store_si512(rxep, mbuf_ptrs); /* gather iova of mbuf0-7 into one zmm reg */ @@ -906,21 +906,16 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq) struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id()); - if (!cache || cache->len == 0) + if (!cache) goto normal; - cache_objs = &cache->objs[cache->len]; + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n); - if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) { + if (!cache_objs) { rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n); goto done; } - /* The cache follows the following algorithm - * 1. Add the objects to the cache - * 2. Anything greater than the cache min value (if it - * crosses the cache flush threshold) is flushed to the ring. - */ /* Add elements back into the cache */ uint32_t copied = 0; /* n is multiple of 32 */ @@ -936,14 +931,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq) _mm512_storeu_si512(&cache_objs[copied + 24], d); copied += 32; } - cache->len += n; - - if (cache->len >= cache->flushthresh) { - rte_mempool_ops_enqueue_bulk - (mp, &cache->objs[cache->size], - cache->len - cache->size); - cache->len = cache->size; - } goto done; } diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h index fe1a6ec75e..70e11a2ef2 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_common.h +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -95,17 +95,24 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) n = txq->tx_rs_thresh; - /* first buffer to free from S/W ring is at index - * tx_next_dd - (tx_rs_thresh-1) - */ + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { - for (i = 0; i < n; i++) { - free[i] = txep[i].mbuf; - /* no need to reset txep[i].mbuf in vector path */ + struct rte_mempool *mp = txep[0].mbuf->pool; + struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id()); + void **cache_objs; + + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n); + if (cache_objs) { + for (i = 0; i < n; i++) { + cache_objs[i] = txep->mbuf; + /* no need to reset txep[i].mbuf in vector path */ + txep++; + } } - rte_mempool_put_bulk(free[0]->pool, (void **)free, n); goto done; } @@ -121,7 +128,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) } else { rte_mempool_put_bulk(free[0]->pool, (void *)free, - nb_free); + nb_free); free[0] = m; nb_free = 1; } diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 12e6f1cbcb..5ffc462a47 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -30,23 +30,32 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) uint64_t paddr; rxdp = rxq->rx_ring + rxq->rxrearm_start; - - /* Pull 'n' more MBUFs into the software ring */ - if (unlikely(rte_mempool_get_bulk(rxq->mp, - (void *)rxep, - RTE_I40E_RXQ_REARM_THRESH) < 0)) { - if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= - rxq->nb_rx_desc) { - for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { - rxep[i].mbuf = &rxq->fake_mbuf; - vst1q_u64((uint64_t *)&rxdp[i].read, zero); + struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, rte_lcore_id()); + + /*When no cache provided, get the objects directly from backend */ + if (!cache) { + int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, (void *)rxep, + RTE_I40E_RXQ_REARM_THRESH); + /* Pull 'n' more MBUFs into the software ring */ + if (ret != 0) { + if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + rxq->nb_rx_desc) { + for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + vst1q_u64((uint64_t *)&rxdp[i].read, zero); + } } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_I40E_RXQ_REARM_THRESH; + return; } - rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += - RTE_I40E_RXQ_REARM_THRESH; - return; } + void **cache_objs; + + cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH); + rte_memcpy(rxep, cache_objs, RTE_I40E_RXQ_REARM_THRESH * sizeof(void *)); + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { mb0 = rxep[0].mbuf; From patchwork Tue Jan 24 17:49:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kamalakshitha Aligeri X-Patchwork-Id: 122495 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 5122F4247A; Tue, 24 Jan 2023 18:50:24 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 423A0427EE; Tue, 24 Jan 2023 18:50:24 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 9314540150 for ; Tue, 24 Jan 2023 18:50:22 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id DA6834B3; Tue, 24 Jan 2023 09:51:03 -0800 (PST) Received: from ampere-altra-2-1.usa.Arm.com (ampere-altra-2-1.usa.arm.com [10.118.91.158]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id F04773F64C; Tue, 24 Jan 2023 09:50:21 -0800 (PST) From: Kamalakshitha Aligeri To: bruce.richardson@intel.com, mb@smartsharesystems.com, konstantin.ananyev@huawei.com, olivier.matz@6wind.com, andrew.rybchenko@oktetlabs.ru, Honnappa.Nagarahalli@arm.com Cc: dev@dpdk.org, nd@arm.com, Kamalakshitha Aligeri Subject: [RFCv2 2/2] test/mempool: add zero-copy API's Date: Tue, 24 Jan 2023 17:49:36 +0000 Message-Id: <20230124174936.190800-2-kamalakshitha.aligeri@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230124174936.190800-1-kamalakshitha.aligeri@arm.com> References: <20230124174936.190800-1-kamalakshitha.aligeri@arm.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Added mempool test cases with zero-copy get and put API's Signed-off-by: Kamalakshitha Aligeri --- Link: https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1-mb@smartsharesystems.com/ 1. Added mempool test cases with zero-copy get and put API's app/test/test_mempool.c | 124 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c index 8e493eda47..a635a514a7 100644 --- a/app/test/test_mempool.c +++ b/app/test/test_mempool.c @@ -72,6 +72,122 @@ my_obj_init(struct rte_mempool *mp, __rte_unused void *arg, *objnum = i; } +/* basic tests with zero-copy API's (done on one core) */ +static int +test_mempool_basic_zc_api(struct rte_mempool *mp, int use_external_cache) +{ + uint32_t *objnum; + void **objtable; + void *obj, *obj2; + char *obj_data; + int ret = 0; + unsigned int i, j; + int offset; + struct rte_mempool_cache *cache; + void **cache_objs; + + if (use_external_cache) { + /* Create a user-owned mempool cache. */ + cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE, + SOCKET_ID_ANY); + if (cache == NULL) + RET_ERR(); + } else { + /* May be NULL if cache is disabled. */ + cache = rte_mempool_default_cache(mp, rte_lcore_id()); + } + + /* dump the mempool status */ + rte_mempool_dump(stdout, mp); + + printf("get an object\n"); + cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1); + obj = *cache_objs; + rte_mempool_dump(stdout, mp); + + /* tests that improve coverage */ + printf("get object count\n"); + /* We have to count the extra caches, one in this case. */ + offset = use_external_cache ? 1 * cache->len : 0; + if (rte_mempool_avail_count(mp) + offset != MEMPOOL_SIZE - 1) + GOTO_ERR(ret, out); + + printf("get private data\n"); + if (rte_mempool_get_priv(mp) != (char *)mp + + RTE_MEMPOOL_HEADER_SIZE(mp, mp->cache_size)) + GOTO_ERR(ret, out); + +#ifndef RTE_EXEC_ENV_FREEBSD /* rte_mem_virt2iova() not supported on bsd */ + printf("get physical address of an object\n"); + if (rte_mempool_virt2iova(obj) != rte_mem_virt2iova(obj)) + GOTO_ERR(ret, out); +#endif + + printf("put the object back\n"); + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1); + rte_memcpy(cache_objs, &obj, sizeof(void *)); + rte_mempool_dump(stdout, mp); + + printf("get 2 objects\n"); + cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1); + obj = *cache_objs; + cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1); + obj2 = *cache_objs; + rte_mempool_dump(stdout, mp); + + printf("put the objects back\n"); + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1); + rte_memcpy(cache_objs, &obj, sizeof(void *)); + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1); + rte_memcpy(cache_objs, &obj2, sizeof(void *)); + rte_mempool_dump(stdout, mp); + + /* + * get many objects: we cannot get them all because the cache + * on other cores may not be empty. + */ + objtable = malloc(MEMPOOL_SIZE * sizeof(void *)); + if (objtable == NULL) + GOTO_ERR(ret, out); + + for (i = 0; i < MEMPOOL_SIZE; i++) { + cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1); + objtable[i] = *cache_objs; + } + + /* + * for each object, check that its content was not modified, + * and put objects back in pool + */ + while (i--) { + obj = objtable[i]; + obj_data = obj; + objnum = obj; + if (*objnum > MEMPOOL_SIZE) { + printf("bad object number(%d)\n", *objnum); + ret = -1; + break; + } + for (j = sizeof(*objnum); j < mp->elt_size; j++) { + if (obj_data[j] != 0) + ret = -1; + } + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1); + rte_memcpy(cache_objs, &objtable[i], sizeof(void *)); + } + + free(objtable); + if (ret == -1) + printf("objects were modified!\n"); + +out: + if (use_external_cache) { + rte_mempool_cache_flush(cache, mp); + rte_mempool_cache_free(cache); + } + + return ret; +} /* basic tests (done on one core) */ static int test_mempool_basic(struct rte_mempool *mp, int use_external_cache) @@ -982,8 +1098,12 @@ test_mempool(void) if (test_mempool_basic(mp_nocache, 0) < 0) GOTO_ERR(ret, err); - /* basic tests with cache */ - if (test_mempool_basic(mp_cache, 0) < 0) + /* basic tests with zero-copy API's */ + if (test_mempool_basic_zc_api(mp_cache, 0) < 0) + GOTO_ERR(ret, err); + + /* basic tests with user-owned cache and zero-copy API's */ + if (test_mempool_basic_zc_api(mp_nocache, 1) < 0) GOTO_ERR(ret, err); /* basic tests with user-owned cache */