[v6,2/4] net/i40e: implement mbufs recycle mode

Message ID 20230525094541.331338-3-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Recycle mbufs from Tx queue to Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang May 25, 2023, 9:45 a.m. UTC
  Define specific function implementation for i40e driver.
Currently, mbufs recycle mode can support 128bit
vector path and avx2 path. And can be enabled both in
fast free and no fast free mode.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 drivers/net/i40e/i40e_ethdev.c                |   1 +
 drivers/net/i40e/i40e_ethdev.h                |   2 +
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 140 ++++++++++++++++++
 drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
 drivers/net/i40e/i40e_rxtx.h                  |   4 +
 drivers/net/i40e/meson.build                  |   2 +
 6 files changed, 181 insertions(+)
 create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
  

Comments

Feifei Wang June 6, 2023, 3:16 a.m. UTC | #1
From: Константин Ананьев <konstantin.v.ananyev@yandex.ru>
Sent: Monday, June 5, 2023 9:03 PM
To: Feifei Wang <Feifei.Wang2@arm.com>; Yuying Zhang <yuying.zhang@intel.com>; Beilei Xing <beilei.xing@intel.com>
Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
Subject: Re: [PATCH v6 2/4] net/i40e: implement mbufs recycle mode





Define specific function implementation for i40e driver.
Currently, mbufs recycle mode can support 128bit
vector path and avx2 path. And can be enabled both in
fast free and no fast free mode.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com<mailto:honnappa.nagarahalli@arm.com>>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com<mailto:feifei.wang2@arm.com>>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com<mailto:ruifeng.wang@arm.com>>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com<mailto:honnappa.nagarahalli@arm.com>>
---
 drivers/net/i40e/i40e_ethdev.c | 1 +
 drivers/net/i40e/i40e_ethdev.h | 2 +
 .../net/i40e/i40e_recycle_mbufs_vec_common.c | 140 ++++++++++++++++++
 drivers/net/i40e/i40e_rxtx.c | 32 ++++
 drivers/net/i40e/i40e_rxtx.h | 4 +
 drivers/net/i40e/meson.build | 2 +
 6 files changed, 181 insertions(+)
 create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index f9d8f9791f..d4eecd16cf 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
         .flow_ops_get = i40e_dev_flow_ops_get,
         .rxq_info_get = i40e_rxq_info_get,
         .txq_info_get = i40e_txq_info_get,
+ .recycle_rxq_info_get = i40e_recycle_rxq_info_get,
         .rx_burst_mode_get = i40e_rx_burst_mode_get,
         .tx_burst_mode_get = i40e_tx_burst_mode_get,
         .timesync_enable = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 9b806d130e..b5b2d6cf2b 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
         struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
         struct rte_eth_txq_info *qinfo);
+void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
 int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
                            struct rte_eth_burst_mode *mode);
 int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
new file mode 100644
index 0000000000..08d708fd7d
--- /dev/null
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Arm Limited.
+ */
+
+#include <stdint.h>
+#include <ethdev_driver.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
+{
+ struct i40e_rx_queue *rxq = rx_queue;
+ struct i40e_rx_entry *rxep;
+ volatile union i40e_rx_desc *rxdp;
+ uint16_t rx_id;
+ uint64_t paddr;
+ uint64_t dma_addr;
+ uint16_t i;
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+ rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ for (i = 0; i < nb_mbufs; i++) {
+ /* Initialize rxdp descs. */
+ paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+ dma_addr = rte_cpu_to_le_64(paddr);
+ /* flush desc with pa dma_addr */
+ rxdp[i].read.hdr_addr = 0;
+ rxdp[i].read.pkt_addr = dma_addr;
+ }
+
+ /* Update the descriptor initializer index */
+ rxq->rxrearm_start += nb_mbufs;
+ rx_id = rxq->rxrearm_start - 1;
+
+ if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+ rxq->rxrearm_start = 0;
+ rx_id = rxq->nb_rx_desc - 1;
+ }
+
+ rxq->rxrearm_nb -= nb_mbufs;
+
+ rte_io_wmb();
+ /* Update the tail pointer on the NIC */
+ I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
+uint16_t
+i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct i40e_tx_queue *txq = tx_queue;
+ struct i40e_tx_entry *txep;
+ struct rte_mbuf **rxep;
+ struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];
+ int i, j, n;
+ uint16_t avail = 0;
+ uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+ uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+ uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+ uint16_t refill_head = *recycle_rxq_info->refill_head;
+ uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+ /* Get available recycling Rx buffers. */
+ avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+ /* Check Tx free thresh and Rx available space. */
+ if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+ return 0;
+
+ /* check DD bits on threshold descriptor */
+ if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+ rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+ rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+ return 0;
+
+ n = txq->tx_rs_thresh;
+
+ /* Mbufs recycle mode can only support no ring buffer wrapping around.
+ * Two case for this:
+ *
+ * case 1: The refill head of Rx buffer ring needs to be aligned with
+ * mbuf ring size. In this case, the number of Tx freeing buffers
+ * should be equal to refill_requirement.
+ *
+ * case 2: The refill head of Rx ring buffer does not need to be aligned
+ * with mbuf ring size. In this case, the update of refill head can not
+ * exceed the Rx mbuf ring size.
+ */
+ if (refill_requirement != n ||
+ (!refill_requirement && (refill_head + n > mbuf_ring_size)))
+ return 0;
+
+ /* First buffer to free from S/W ring is at index
+ * tx_next_dd - (tx_rs_thresh-1).
+ */
+ txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+ rxep = recycle_rxq_info->mbuf_ring;
+ rxep += refill_head;
+
+ if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+ /* Directly put mbufs from Tx to Rx. */
+ for (i = 0; i < n; i++, rxep++, txep++)
+ *rxep = txep[0].mbuf;
+ } else {
+ for (i = 0, j = 0; i < n; i++) {
+ /* Avoid txq contains buffers from expected mempool. */
+ if (unlikely(recycle_rxq_info->mp
+ != txep[i].mbuf->pool))
+ return 0;
I don't think that it is possible to simply return 0 here:
we might already have some mbufs inside rxep[], so we probably need
to return that number (j).

No, here is just pre-free, not actually put mbufs into rxeq.
After run out of the loop, we call rte_memcpy to actually copy
mbufs into rxep.

+
+ m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+ /* In case 1, each of Tx buffers should be the
+ * last reference.
+ */
+ if (unlikely(m[j] == NULL && refill_requirement))
+ return 0;

same here, we can't simply return 0, it will introduce mbuf leakage.

+ /* In case 2, the number of valid Tx free
+ * buffers should be recorded.
+ */
+ j++;
+ }
+ rte_memcpy(rxep, m, sizeof(void *) * j);
Wonder why do you need intermediate buffer for released mbufs?
Why can't just:
...
m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
...
rxep[j++] = m;
?
Might save you few extra cycles.
Sometimes ‘rte_pktmbuf_prefree_seg’ can return NULL due to
mbuf->refcnt > 1. So we should firstly ensure all ‘m’ are valid and
then copy them into rxep.

+ }
+
+ /* Update counters for Tx. */
+ txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+ txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+ if (txq->tx_next_dd >= txq->nb_tx_desc)
+ txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+ return n;
+}
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 788ffb51c2..53cf787f04 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3197,6 +3197,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
         qinfo->conf.offloads = txq->offloads;
 }

+void
+i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct i40e_rx_queue *rxq;
+ struct i40e_adapter *ad =
+ I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+ rxq = dev->data->rx_queues[queue_id];
+
+ recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
+ recycle_rxq_info->mp = rxq->mp;
+ recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
+ recycle_rxq_info->receive_tail = &rxq->rx_tail;
+
+ if (ad->rx_vec_allowed) {
+ recycle_rxq_info->refill_requirement = RTE_I40E_RXQ_REARM_THRESH;
+ recycle_rxq_info->refill_head = &rxq->rxrearm_start;
+ } else {
+ recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
+ recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
+ }
+}
+
 #ifdef RTE_ARCH_X86
 static inline bool
 get_avx_supported(bool request_avx512)
@@ -3291,6 +3315,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                 dev->rx_pkt_burst = ad->rx_use_avx2 ?
                                         i40e_recv_scattered_pkts_vec_avx2 :
                                         i40e_recv_scattered_pkts_vec;
+ dev->recycle_rx_descriptors_refill =
+ i40e_recycle_rx_descriptors_refill_vec;
                         }
                 } else {
                         if (ad->rx_use_avx512) {
@@ -3309,9 +3335,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                                 dev->rx_pkt_burst = ad->rx_use_avx2 ?
                                         i40e_recv_pkts_vec_avx2 :
                                         i40e_recv_pkts_vec;
+ dev->recycle_rx_descriptors_refill =
+ i40e_recycle_rx_descriptors_refill_vec;
                         }
                 }
 #else /* RTE_ARCH_X86 */
+ dev->recycle_rx_descriptors_refill = i40e_recycle_rx_descriptors_refill_vec;
                 if (dev->data->scattered_rx) {
                         PMD_INIT_LOG(DEBUG,
                                      "Using Vector Scattered Rx (port %d).",
@@ -3479,15 +3508,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
                                 dev->tx_pkt_burst = ad->tx_use_avx2 ?
                                                     i40e_xmit_pkts_vec_avx2 :
                                                     i40e_xmit_pkts_vec;
+ dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
                         }
 #else /* RTE_ARCH_X86 */
                         PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
                                      dev->data->port_id);
                         dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+ dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
                 } else {
                         PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                         dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+ dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
                 }
                 dev->tx_pkt_prepare = i40e_simple_prep_pkts;
         } else {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..ed8921ddc0 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -233,6 +233,10 @@ uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
 int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);

+uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+void i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs);
+
 uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                             uint16_t nb_pkts);
 uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build
index 8e53b87a65..58eb627abc 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -42,6 +42,8 @@ testpmd_sources = files('i40e_testpmd.c')
 deps += ['hash']
 includes += include_directories('base')

+sources += files('i40e_recycle_mbufs_vec_common.c')
+
 if arch_subdir == 'x86'
     sources += files('i40e_rxtx_vec_sse.c')

--
2.25.1
  
Konstantin Ananyev June 6, 2023, 7:18 a.m. UTC | #2
> 
> Define specific function implementation for i40e driver.
> Currently, mbufs recycle mode can support 128bit
> vector path and avx2 path. And can be enabled both in
> fast free and no fast free mode.
> 
> Suggested-by: Honnappa Nagarahalli <mailto:honnappa.nagarahalli@arm.com>
> Signed-off-by: Feifei Wang <mailto:feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <mailto:honnappa.nagarahalli@arm.com>
> ---
>  drivers/net/i40e/i40e_ethdev.c | 1 +
>  drivers/net/i40e/i40e_ethdev.h | 2 +
>  .../net/i40e/i40e_recycle_mbufs_vec_common.c | 140 ++++++++++++++++++
>  drivers/net/i40e/i40e_rxtx.c | 32 ++++
>  drivers/net/i40e/i40e_rxtx.h | 4 +
>  drivers/net/i40e/meson.build | 2 +
>  6 files changed, 181 insertions(+)
>  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index f9d8f9791f..d4eecd16cf 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
>          .flow_ops_get = i40e_dev_flow_ops_get,
>          .rxq_info_get = i40e_rxq_info_get,
>          .txq_info_get = i40e_txq_info_get,
> + .recycle_rxq_info_get = i40e_recycle_rxq_info_get,
>          .rx_burst_mode_get = i40e_rx_burst_mode_get,
>          .tx_burst_mode_get = i40e_tx_burst_mode_get,
>          .timesync_enable = i40e_timesync_enable,
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 9b806d130e..b5b2d6cf2b 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>          struct rte_eth_rxq_info *qinfo);
>  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>          struct rte_eth_txq_info *qinfo);
> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info);
>  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
>                             struct rte_eth_burst_mode *mode);
>  int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> new file mode 100644
> index 0000000000..08d708fd7d
> --- /dev/null
> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> @@ -0,0 +1,140 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2023 Arm Limited.
> + */
> +
> +#include <stdint.h>
> +#include <ethdev_driver.h>
> +
> +#include "base/i40e_prototype.h"
> +#include "base/i40e_type.h"
> +#include "i40e_ethdev.h"
> +#include "i40e_rxtx.h"
> +
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +
> +void
> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
> +{
> + struct i40e_rx_queue *rxq = rx_queue;
> + struct i40e_rx_entry *rxep;
> + volatile union i40e_rx_desc *rxdp;
> + uint16_t rx_id;
> + uint64_t paddr;
> + uint64_t dma_addr;
> + uint16_t i;
> +
> + rxdp = rxq->rx_ring + rxq->rxrearm_start;
> + rxep = &rxq->sw_ring[rxq->rxrearm_start];
> +
> + for (i = 0; i < nb_mbufs; i++) {
> + /* Initialize rxdp descs. */
> + paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
> + dma_addr = rte_cpu_to_le_64(paddr);
> + /* flush desc with pa dma_addr */
> + rxdp[i].read.hdr_addr = 0;
> + rxdp[i].read.pkt_addr = dma_addr;
> + }
> +
> + /* Update the descriptor initializer index */
> + rxq->rxrearm_start += nb_mbufs;
> + rx_id = rxq->rxrearm_start - 1;
> +
> + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> + rxq->rxrearm_start = 0;
> + rx_id = rxq->nb_rx_desc - 1;
> + }
> +
> + rxq->rxrearm_nb -= nb_mbufs;
> +
> + rte_io_wmb();
> + /* Update the tail pointer on the NIC */
> + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
> +}
> +
> +uint16_t
> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info)
> +{
> + struct i40e_tx_queue *txq = tx_queue;
> + struct i40e_tx_entry *txep;
> + struct rte_mbuf **rxep;
> + struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];
> + int i, j, n;
> + uint16_t avail = 0;
> + uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> + uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> + uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> + uint16_t refill_head = *recycle_rxq_info->refill_head;
> + uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> +
> + /* Get available recycling Rx buffers. */
> + avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> +
> + /* Check Tx free thresh and Rx available space. */
> + if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> + return 0;
> +
> + /* check DD bits on threshold descriptor */
> + if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> + return 0;
> +
> + n = txq->tx_rs_thresh;
> +
> + /* Mbufs recycle mode can only support no ring buffer wrapping around.
> + * Two case for this:
> + *
> + * case 1: The refill head of Rx buffer ring needs to be aligned with
> + * mbuf ring size. In this case, the number of Tx freeing buffers
> + * should be equal to refill_requirement.
> + *
> + * case 2: The refill head of Rx ring buffer does not need to be aligned
> + * with mbuf ring size. In this case, the update of refill head can not
> + * exceed the Rx mbuf ring size.
> + */
> + if (refill_requirement != n ||
> + (!refill_requirement && (refill_head + n > mbuf_ring_size)))
> + return 0;
> +
> + /* First buffer to free from S/W ring is at index
> + * tx_next_dd - (tx_rs_thresh-1).
> + */
> + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> + rxep = recycle_rxq_info->mbuf_ring;
> + rxep += refill_head;
> +
> + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> + /* Directly put mbufs from Tx to Rx. */
> + for (i = 0; i < n; i++, rxep++, txep++)
> + *rxep = txep[0].mbuf;
> + } else {
> + for (i = 0, j = 0; i < n; i++) {
> + /* Avoid txq contains buffers from expected mempool. */
> + if (unlikely(recycle_rxq_info->mp
> + != txep[i].mbuf->pool))
> + return 0;
> I don't think that it is possible to simply return 0 here:
> we might already have some mbufs inside rxep[], so we probably need
> to return that number (j).
> 
> No, here is just pre-free, not actually put mbufs into rxeq.
 
I understand that.
What I am saying: after you call pktmbuf_prefree_seg(mbuf),
you can’t keep it in the txq anymore.
You have either to put it into rxep[], or back into mempool.
Also txq state (nb_tx_free, etc.) need to be updated.

> After run out of the loop, we call rte_memcpy to actually copy
> mbufs into rxep.
> +
> + m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +
> + /* In case 1, each of Tx buffers should be the
> + * last reference.
> + */
> + if (unlikely(m[j] == NULL && refill_requirement))
> + return 0;
> 
> same here, we can't simply return 0, it will introduce mbuf leakage.
> + /* In case 2, the number of valid Tx free
> + * buffers should be recorded.
> + */
> + j++;
> + }
> + rte_memcpy(rxep, m, sizeof(void *) * j);
> Wonder why do you need intermediate buffer for released mbufs?
> Why can't just:
> ...
> m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> ...
> rxep[j++] = m;
> ?
> Might save you few extra cycles.
> Sometimes ‘rte_pktmbuf_prefree_seg’ can return NULL due to
> mbuf->refcnt > 1. So we should firstly ensure all ‘m’ are valid and
> then copy them into rxep.

I understand that, but you can check is it NULL or not.

> + }
> +
> + /* Update counters for Tx. */
> + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> + if (txq->tx_next_dd >= txq->nb_tx_desc)
> + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> + return n;
> +}
  
Feifei Wang June 6, 2023, 7:58 a.m. UTC | #3
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Tuesday, June 6, 2023 3:18 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Константин Ананьев
> <konstantin.v.ananyev@yandex.ru>; Yuying Zhang
> <yuying.zhang@intel.com>; Beilei Xing <beilei.xing@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v6 2/4] net/i40e: implement mbufs recycle mode
> 
> >
> > Define specific function implementation for i40e driver.
> > Currently, mbufs recycle mode can support 128bit vector path and avx2
> > path. And can be enabled both in fast free and no fast free mode.
> >
> > Suggested-by: Honnappa Nagarahalli
> > <mailto:honnappa.nagarahalli@arm.com>
> > Signed-off-by: Feifei Wang <mailto:feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli
> > <mailto:honnappa.nagarahalli@arm.com>
> > ---
> >  drivers/net/i40e/i40e_ethdev.c | 1 +
> >  drivers/net/i40e/i40e_ethdev.h | 2 +
> >  .../net/i40e/i40e_recycle_mbufs_vec_common.c | 140
> ++++++++++++++++++
> > drivers/net/i40e/i40e_rxtx.c | 32 ++++  drivers/net/i40e/i40e_rxtx.h |
> > 4 +  drivers/net/i40e/meson.build | 2 +
> >  6 files changed, 181 insertions(+)
> >  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index f9d8f9791f..d4eecd16cf 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops =
> {
> >          .flow_ops_get = i40e_dev_flow_ops_get,
> >          .rxq_info_get = i40e_rxq_info_get,
> >          .txq_info_get = i40e_txq_info_get,
> > + .recycle_rxq_info_get = i40e_recycle_rxq_info_get,
> >          .rx_burst_mode_get = i40e_rx_burst_mode_get,
> >          .tx_burst_mode_get = i40e_tx_burst_mode_get,
> >          .timesync_enable = i40e_timesync_enable, diff --git
> > a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > index 9b806d130e..b5b2d6cf2b 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
> >          struct rte_eth_rxq_info *qinfo);  void
> > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >          struct rte_eth_txq_info *qinfo);
> > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > +queue_id,  struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >                             struct rte_eth_burst_mode *mode);  int
> > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > new file mode 100644
> > index 0000000000..08d708fd7d
> > --- /dev/null
> > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > @@ -0,0 +1,140 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright (c) 2023 Arm Limited.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <ethdev_driver.h>
> > +
> > +#include "base/i40e_prototype.h"
> > +#include "base/i40e_type.h"
> > +#include "i40e_ethdev.h"
> > +#include "i40e_rxtx.h"
> > +
> > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > +
> > +void
> > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > +nb_mbufs) {  struct i40e_rx_queue *rxq = rx_queue;  struct
> > +i40e_rx_entry *rxep;  volatile union i40e_rx_desc *rxdp;  uint16_t
> > +rx_id;  uint64_t paddr;  uint64_t dma_addr;  uint16_t i;
> > +
> > + rxdp = rxq->rx_ring + rxq->rxrearm_start; rxep =
> > + &rxq->sw_ring[rxq->rxrearm_start];
> > +
> > + for (i = 0; i < nb_mbufs; i++) {
> > + /* Initialize rxdp descs. */
> > + paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
> dma_addr =
> > + rte_cpu_to_le_64(paddr);
> > + /* flush desc with pa dma_addr */
> > + rxdp[i].read.hdr_addr = 0;
> > + rxdp[i].read.pkt_addr = dma_addr;
> > + }
> > +
> > + /* Update the descriptor initializer index */
> > + rxq->rxrearm_start += nb_mbufs;
> > + rx_id = rxq->rxrearm_start - 1;
> > +
> > + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > + rxq->rxrearm_start = 0;
> > + rx_id = rxq->nb_rx_desc - 1;
> > + }
> > +
> > + rxq->rxrearm_nb -= nb_mbufs;
> > +
> > + rte_io_wmb();
> > + /* Update the tail pointer on the NIC */
> > +I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > +
> > +uint16_t
> > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,  struct
> > +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct i40e_tx_queue
> > +*txq = tx_queue;  struct i40e_tx_entry *txep;  struct rte_mbuf
> > +**rxep;  struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];  int i, j,
> > +n;  uint16_t avail = 0;  uint16_t mbuf_ring_size =
> > +recycle_rxq_info->mbuf_ring_size;
> > + uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;  uint16_t
> > +refill_requirement = recycle_rxq_info->refill_requirement;
> > + uint16_t refill_head = *recycle_rxq_info->refill_head;  uint16_t
> > +receive_tail = *recycle_rxq_info->receive_tail;
> > +
> > + /* Get available recycling Rx buffers. */ avail = (mbuf_ring_size -
> > + (refill_head - receive_tail)) & mask;
> > +
> > + /* Check Tx free thresh and Rx available space. */ if
> > + (txq->nb_tx_free > txq->tx_free_thresh || avail <=
> > + txq->tx_rs_thresh) return 0;
> > +
> > + /* check DD bits on threshold descriptor */ if
> > + ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > + return 0;
> > +
> > + n = txq->tx_rs_thresh;
> > +
> > + /* Mbufs recycle mode can only support no ring buffer wrapping around.
> > + * Two case for this:
> > + *
> > + * case 1: The refill head of Rx buffer ring needs to be aligned with
> > + * mbuf ring size. In this case, the number of Tx freeing buffers
> > + * should be equal to refill_requirement.
> > + *
> > + * case 2: The refill head of Rx ring buffer does not need to be
> > + aligned
> > + * with mbuf ring size. In this case, the update of refill head can
> > + not
> > + * exceed the Rx mbuf ring size.
> > + */
> > + if (refill_requirement != n ||
> > + (!refill_requirement && (refill_head + n > mbuf_ring_size))) return
> > + 0;
> > +
> > + /* First buffer to free from S/W ring is at index
> > + * tx_next_dd - (tx_rs_thresh-1).
> > + */
> > + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; rxep =
> > + recycle_rxq_info->mbuf_ring; rxep += refill_head;
> > +
> > + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > + /* Directly put mbufs from Tx to Rx. */ for (i = 0; i < n; i++,
> > + rxep++, txep++) *rxep = txep[0].mbuf; } else { for (i = 0, j = 0; i
> > + < n; i++) {
> > + /* Avoid txq contains buffers from expected mempool. */ if
> > + (unlikely(recycle_rxq_info->mp != txep[i].mbuf->pool)) return 0;
> > I don't think that it is possible to simply return 0 here:
> > we might already have some mbufs inside rxep[], so we probably need to
> > return that number (j).
> >
> > No, here is just pre-free, not actually put mbufs into rxeq.
> 
> I understand that.
> What I am saying: after you call pktmbuf_prefree_seg(mbuf), you can’t keep it
> in the txq anymore.
> You have either to put it into rxep[], or back into mempool.
> Also txq state (nb_tx_free, etc.) need to be updated.
> 
> > After run out of the loop, we call rte_memcpy to actually copy mbufs
> > into rxep.
> > +
> > + m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +
> > + /* In case 1, each of Tx buffers should be the
> > + * last reference.
> > + */
> > + if (unlikely(m[j] == NULL && refill_requirement)) return 0;
> >
> > same here, we can't simply return 0, it will introduce mbuf leakage.
> > + /* In case 2, the number of valid Tx free
> > + * buffers should be recorded.
> > + */
> > + j++;
> > + }
> > + rte_memcpy(rxep, m, sizeof(void *) * j);
> > Wonder why do you need intermediate buffer for released mbufs?
> > Why can't just:
> > ...
> > m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > ...
> > rxep[j++] = m;
> > ?
> > Might save you few extra cycles.
> > Sometimes ‘rte_pktmbuf_prefree_seg’ can return NULL due to
> > mbuf->refcnt > 1. So we should firstly ensure all ‘m’ are valid and
> > then copy them into rxep.
> 
> I understand that, but you can check is it NULL or not.
For i40e rxq, it must rearm ' RTE_I40E_RXQ_REARM_THRESH ' pkts once a time
based on its ring wrapping mechanism. 

For i40e txq, it must free ' txq->tx_rs_thresh' pkts once a time.

So we need firstly ensure all tx free mbufs are valid, and then copy these into rxq.
If not enough valid mbufs, it will break rxq's ring wrapping mechanism.

> 
> > + }
> > +
> > + /* Update counters for Tx. */
> > + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > + if (txq->tx_next_dd >= txq->nb_tx_desc)
> > + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > +
> > + return n;
> > +}
  
Konstantin Ananyev June 6, 2023, 8:27 a.m. UTC | #4
> > > Define specific function implementation for i40e driver.
> > > Currently, mbufs recycle mode can support 128bit vector path and avx2
> > > path. And can be enabled both in fast free and no fast free mode.
> > >
> > > Suggested-by: Honnappa Nagarahalli
> > > <mailto:honnappa.nagarahalli@arm.com>
> > > Signed-off-by: Feifei Wang <mailto:feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> > > Reviewed-by: Honnappa Nagarahalli
> > > <mailto:honnappa.nagarahalli@arm.com>
> > > ---
> > >  drivers/net/i40e/i40e_ethdev.c | 1 +
> > >  drivers/net/i40e/i40e_ethdev.h | 2 +
> > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c | 140
> > ++++++++++++++++++
> > > drivers/net/i40e/i40e_rxtx.c | 32 ++++  drivers/net/i40e/i40e_rxtx.h |
> > > 4 +  drivers/net/i40e/meson.build | 2 +
> > >  6 files changed, 181 insertions(+)
> > >  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > >
> > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > b/drivers/net/i40e/i40e_ethdev.c index f9d8f9791f..d4eecd16cf 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops =
> > {
> > >          .flow_ops_get = i40e_dev_flow_ops_get,
> > >          .rxq_info_get = i40e_rxq_info_get,
> > >          .txq_info_get = i40e_txq_info_get,
> > > + .recycle_rxq_info_get = i40e_recycle_rxq_info_get,
> > >          .rx_burst_mode_get = i40e_rx_burst_mode_get,
> > >          .tx_burst_mode_get = i40e_tx_burst_mode_get,
> > >          .timesync_enable = i40e_timesync_enable, diff --git
> > > a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > > index 9b806d130e..b5b2d6cf2b 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
> > uint16_t queue_id,
> > >          struct rte_eth_rxq_info *qinfo);  void
> > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > >          struct rte_eth_txq_info *qinfo);
> > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > > +queue_id,  struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > >                             struct rte_eth_burst_mode *mode);  int
> > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > new file mode 100644
> > > index 0000000000..08d708fd7d
> > > --- /dev/null
> > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > @@ -0,0 +1,140 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright (c) 2023 Arm Limited.
> > > + */
> > > +
> > > +#include <stdint.h>
> > > +#include <ethdev_driver.h>
> > > +
> > > +#include "base/i40e_prototype.h"
> > > +#include "base/i40e_type.h"
> > > +#include "i40e_ethdev.h"
> > > +#include "i40e_rxtx.h"
> > > +
> > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > +
> > > +void
> > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > +nb_mbufs) {  struct i40e_rx_queue *rxq = rx_queue;  struct
> > > +i40e_rx_entry *rxep;  volatile union i40e_rx_desc *rxdp;  uint16_t
> > > +rx_id;  uint64_t paddr;  uint64_t dma_addr;  uint16_t i;
> > > +
> > > + rxdp = rxq->rx_ring + rxq->rxrearm_start; rxep =
> > > + &rxq->sw_ring[rxq->rxrearm_start];
> > > +
> > > + for (i = 0; i < nb_mbufs; i++) {
> > > + /* Initialize rxdp descs. */
> > > + paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
> > dma_addr =
> > > + rte_cpu_to_le_64(paddr);
> > > + /* flush desc with pa dma_addr */
> > > + rxdp[i].read.hdr_addr = 0;
> > > + rxdp[i].read.pkt_addr = dma_addr;
> > > + }
> > > +
> > > + /* Update the descriptor initializer index */
> > > + rxq->rxrearm_start += nb_mbufs;
> > > + rx_id = rxq->rxrearm_start - 1;
> > > +
> > > + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > + rxq->rxrearm_start = 0;
> > > + rx_id = rxq->nb_rx_desc - 1;
> > > + }
> > > +
> > > + rxq->rxrearm_nb -= nb_mbufs;
> > > +
> > > + rte_io_wmb();
> > > + /* Update the tail pointer on the NIC */
> > > +I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > +
> > > +uint16_t
> > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,  struct
> > > +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct i40e_tx_queue
> > > +*txq = tx_queue;  struct i40e_tx_entry *txep;  struct rte_mbuf
> > > +**rxep;  struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];  int i, j,
> > > +n;  uint16_t avail = 0;  uint16_t mbuf_ring_size =
> > > +recycle_rxq_info->mbuf_ring_size;
> > > + uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;  uint16_t
> > > +refill_requirement = recycle_rxq_info->refill_requirement;
> > > + uint16_t refill_head = *recycle_rxq_info->refill_head;  uint16_t
> > > +receive_tail = *recycle_rxq_info->receive_tail;
> > > +
> > > + /* Get available recycling Rx buffers. */ avail = (mbuf_ring_size -
> > > + (refill_head - receive_tail)) & mask;
> > > +
> > > + /* Check Tx free thresh and Rx available space. */ if
> > > + (txq->nb_tx_free > txq->tx_free_thresh || avail <=
> > > + txq->tx_rs_thresh) return 0;
> > > +
> > > + /* check DD bits on threshold descriptor */ if
> > > + ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > + return 0;
> > > +
> > > + n = txq->tx_rs_thresh;
> > > +
> > > + /* Mbufs recycle mode can only support no ring buffer wrapping around.
> > > + * Two case for this:
> > > + *
> > > + * case 1: The refill head of Rx buffer ring needs to be aligned with
> > > + * mbuf ring size. In this case, the number of Tx freeing buffers
> > > + * should be equal to refill_requirement.
> > > + *
> > > + * case 2: The refill head of Rx ring buffer does not need to be
> > > + aligned
> > > + * with mbuf ring size. In this case, the update of refill head can
> > > + not
> > > + * exceed the Rx mbuf ring size.
> > > + */
> > > + if (refill_requirement != n ||
> > > + (!refill_requirement && (refill_head + n > mbuf_ring_size))) return
> > > + 0;
> > > +
> > > + /* First buffer to free from S/W ring is at index
> > > + * tx_next_dd - (tx_rs_thresh-1).
> > > + */
> > > + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; rxep =
> > > + recycle_rxq_info->mbuf_ring; rxep += refill_head;
> > > +
> > > + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > + /* Directly put mbufs from Tx to Rx. */ for (i = 0; i < n; i++,
> > > + rxep++, txep++) *rxep = txep[0].mbuf; } else { for (i = 0, j = 0; i
> > > + < n; i++) {
> > > + /* Avoid txq contains buffers from expected mempool. */ if
> > > + (unlikely(recycle_rxq_info->mp != txep[i].mbuf->pool)) return 0;
> > > I don't think that it is possible to simply return 0 here:
> > > we might already have some mbufs inside rxep[], so we probably need to
> > > return that number (j).
> > >
> > > No, here is just pre-free, not actually put mbufs into rxeq.
> >
> > I understand that.
> > What I am saying: after you call pktmbuf_prefree_seg(mbuf), you can’t keep it
> > in the txq anymore.
> > You have either to put it into rxep[], or back into mempool.
> > Also txq state (nb_tx_free, etc.) need to be updated.
> >
> > > After run out of the loop, we call rte_memcpy to actually copy mbufs
> > > into rxep.
> > > +
> > > + m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > +
> > > + /* In case 1, each of Tx buffers should be the
> > > + * last reference.
> > > + */
> > > + if (unlikely(m[j] == NULL && refill_requirement)) return 0;
> > >
> > > same here, we can't simply return 0, it will introduce mbuf leakage.
> > > + /* In case 2, the number of valid Tx free
> > > + * buffers should be recorded.
> > > + */
> > > + j++;
> > > + }
> > > + rte_memcpy(rxep, m, sizeof(void *) * j);
> > > Wonder why do you need intermediate buffer for released mbufs?
> > > Why can't just:
> > > ...
> > > m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > ...
> > > rxep[j++] = m;
> > > ?
> > > Might save you few extra cycles.
> > > Sometimes ‘rte_pktmbuf_prefree_seg’ can return NULL due to
> > > mbuf->refcnt > 1. So we should firstly ensure all ‘m’ are valid and
> > > then copy them into rxep.
> >
> > I understand that, but you can check is it NULL or not.
> For i40e rxq, it must rearm ' RTE_I40E_RXQ_REARM_THRESH ' pkts once a time
> based on its ring wrapping mechanism.
> 
> For i40e txq, it must free ' txq->tx_rs_thresh' pkts once a time.
> 
> So we need firstly ensure all tx free mbufs are valid, and then copy these into rxq.
> If not enough valid mbufs, it will break rxq's ring wrapping mechanism.
 
I think you can still copy mbufs into rxep[], if there are not enough mbufs,
you can still return 0 (or whatever is a proper value here), and that would mean
all these new rxep[] entries will be considered as invalid.
Anyway that's just a suggestion to avoid extra copy.

> 
> >
> > > + }
> > > +
> > > + /* Update counters for Tx. */
> > > + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > > + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > > + if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > +
> > > + return n;
> > > +}
  
Feifei Wang June 12, 2023, 3:05 a.m. UTC | #5
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Tuesday, June 6, 2023 4:27 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Константин Ананьев
> <konstantin.v.ananyev@yandex.ru>; Yuying Zhang
> <yuying.zhang@intel.com>; Beilei Xing <beilei.xing@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v6 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > > > Define specific function implementation for i40e driver.
> > > > Currently, mbufs recycle mode can support 128bit vector path and
> > > > avx2 path. And can be enabled both in fast free and no fast free mode.
> > > >
> > > > Suggested-by: Honnappa Nagarahalli
> > > > <mailto:honnappa.nagarahalli@arm.com>
> > > > Signed-off-by: Feifei Wang <mailto:feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> > > > Reviewed-by: Honnappa Nagarahalli
> > > > <mailto:honnappa.nagarahalli@arm.com>
> > > > ---
> > > >  drivers/net/i40e/i40e_ethdev.c | 1 +
> > > > drivers/net/i40e/i40e_ethdev.h | 2 +
> > > > .../net/i40e/i40e_recycle_mbufs_vec_common.c | 140
> > > ++++++++++++++++++
> > > > drivers/net/i40e/i40e_rxtx.c | 32 ++++
> > > > drivers/net/i40e/i40e_rxtx.h |
> > > > 4 +  drivers/net/i40e/meson.build | 2 +
> > > >  6 files changed, 181 insertions(+)  create mode 100644
> > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > >
> > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > b/drivers/net/i40e/i40e_ethdev.c index f9d8f9791f..d4eecd16cf
> > > > 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > i40e_eth_dev_ops =
> > > {
> > > >          .flow_ops_get = i40e_dev_flow_ops_get,
> > > >          .rxq_info_get = i40e_rxq_info_get,
> > > >          .txq_info_get = i40e_txq_info_get,
> > > > + .recycle_rxq_info_get = i40e_recycle_rxq_info_get,
> > > >          .rx_burst_mode_get = i40e_rx_burst_mode_get,
> > > >          .tx_burst_mode_get = i40e_tx_burst_mode_get,
> > > >          .timesync_enable = i40e_timesync_enable, diff --git
> > > > a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > > > index 9b806d130e..b5b2d6cf2b 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev
> > > > *dev,
> > > uint16_t queue_id,
> > > >          struct rte_eth_rxq_info *qinfo);  void
> > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > >          struct rte_eth_txq_info *qinfo);
> > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > > > +queue_id,  struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > >                             struct rte_eth_burst_mode *mode);  int
> > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > new file mode 100644
> > > > index 0000000000..08d708fd7d
> > > > --- /dev/null
> > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > @@ -0,0 +1,140 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright (c) 2023 Arm Limited.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +#include <ethdev_driver.h>
> > > > +
> > > > +#include "base/i40e_prototype.h"
> > > > +#include "base/i40e_type.h"
> > > > +#include "i40e_ethdev.h"
> > > > +#include "i40e_rxtx.h"
> > > > +
> > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > +
> > > > +void
> > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > +nb_mbufs) {  struct i40e_rx_queue *rxq = rx_queue;  struct
> > > > +i40e_rx_entry *rxep;  volatile union i40e_rx_desc *rxdp;
> > > > +uint16_t rx_id;  uint64_t paddr;  uint64_t dma_addr;  uint16_t i;
> > > > +
> > > > + rxdp = rxq->rx_ring + rxq->rxrearm_start; rxep =
> > > > + &rxq->sw_ring[rxq->rxrearm_start];
> > > > +
> > > > + for (i = 0; i < nb_mbufs; i++) {
> > > > + /* Initialize rxdp descs. */
> > > > + paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
> > > dma_addr =
> > > > + rte_cpu_to_le_64(paddr);
> > > > + /* flush desc with pa dma_addr */ rxdp[i].read.hdr_addr = 0;
> > > > + rxdp[i].read.pkt_addr = dma_addr; }
> > > > +
> > > > + /* Update the descriptor initializer index */
> > > > + rxq->rxrearm_start += nb_mbufs;
> > > > + rx_id = rxq->rxrearm_start - 1;
> > > > +
> > > > + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > + rxq->rxrearm_start = 0;
> > > > + rx_id = rxq->nb_rx_desc - 1;
> > > > + }
> > > > +
> > > > + rxq->rxrearm_nb -= nb_mbufs;
> > > > +
> > > > + rte_io_wmb();
> > > > + /* Update the tail pointer on the NIC */
> > > > +I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > +
> > > > +uint16_t
> > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,  struct
> > > > +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct
> > > > +i40e_tx_queue *txq = tx_queue;  struct i40e_tx_entry *txep;
> > > > +struct rte_mbuf **rxep;  struct rte_mbuf
> > > > +*m[RTE_I40E_TX_MAX_FREE_BUF_SZ];  int i, j, n;  uint16_t avail =
> > > > +0;  uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > +uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;  uint16_t
> > > > +refill_requirement = recycle_rxq_info->refill_requirement;
> > > > + uint16_t refill_head = *recycle_rxq_info->refill_head;  uint16_t
> > > > +receive_tail = *recycle_rxq_info->receive_tail;
> > > > +
> > > > + /* Get available recycling Rx buffers. */ avail =
> > > > + (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> > > > +
> > > > + /* Check Tx free thresh and Rx available space. */ if
> > > > + (txq->nb_tx_free > txq->tx_free_thresh || avail <=
> > > > + txq->tx_rs_thresh) return 0;
> > > > +
> > > > + /* check DD bits on threshold descriptor */ if
> > > > + ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > + return 0;
> > > > +
> > > > + n = txq->tx_rs_thresh;
> > > > +
> > > > + /* Mbufs recycle mode can only support no ring buffer wrapping
> around.
> > > > + * Two case for this:
> > > > + *
> > > > + * case 1: The refill head of Rx buffer ring needs to be aligned
> > > > + with
> > > > + * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > + * should be equal to refill_requirement.
> > > > + *
> > > > + * case 2: The refill head of Rx ring buffer does not need to be
> > > > + aligned
> > > > + * with mbuf ring size. In this case, the update of refill head
> > > > + can not
> > > > + * exceed the Rx mbuf ring size.
> > > > + */
> > > > + if (refill_requirement != n ||
> > > > + (!refill_requirement && (refill_head + n > mbuf_ring_size)))
> > > > + return 0;
> > > > +
> > > > + /* First buffer to free from S/W ring is at index
> > > > + * tx_next_dd - (tx_rs_thresh-1).
> > > > + */
> > > > + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; rxep =
> > > > + recycle_rxq_info->mbuf_ring; rxep += refill_head;
> > > > +
> > > > + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > + /* Directly put mbufs from Tx to Rx. */ for (i = 0; i < n; i++,
> > > > + rxep++, txep++) *rxep = txep[0].mbuf; } else { for (i = 0, j =
> > > > + rxep++0; i
> > > > + < n; i++) {
> > > > + /* Avoid txq contains buffers from expected mempool. */ if
> > > > + (unlikely(recycle_rxq_info->mp != txep[i].mbuf->pool)) return 0;
> > > > I don't think that it is possible to simply return 0 here:
> > > > we might already have some mbufs inside rxep[], so we probably
> > > > need to return that number (j).
> > > >
> > > > No, here is just pre-free, not actually put mbufs into rxeq.
> > >
> > > I understand that.
> > > What I am saying: after you call pktmbuf_prefree_seg(mbuf), you
> > > can’t keep it in the txq anymore.
> > > You have either to put it into rxep[], or back into mempool.
> > > Also txq state (nb_tx_free, etc.) need to be updated.
> > >
> > > > After run out of the loop, we call rte_memcpy to actually copy
> > > > mbufs into rxep.
> > > > +
> > > > + m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > +
> > > > + /* In case 1, each of Tx buffers should be the
> > > > + * last reference.
> > > > + */
> > > > + if (unlikely(m[j] == NULL && refill_requirement)) return 0;
> > > >
> > > > same here, we can't simply return 0, it will introduce mbuf leakage.
> > > > + /* In case 2, the number of valid Tx free
> > > > + * buffers should be recorded.
> > > > + */
> > > > + j++;
> > > > + }
> > > > + rte_memcpy(rxep, m, sizeof(void *) * j);
> > > > Wonder why do you need intermediate buffer for released mbufs?
> > > > Why can't just:
> > > > ...
> > > > m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > ...
> > > > rxep[j++] = m;
> > > > ?
> > > > Might save you few extra cycles.
> > > > Sometimes ‘rte_pktmbuf_prefree_seg’ can return NULL due to
> > > > mbuf->refcnt > 1. So we should firstly ensure all ‘m’ are valid
> > > > mbuf->and
> > > > then copy them into rxep.
> > >
> > > I understand that, but you can check is it NULL or not.
> > For i40e rxq, it must rearm ' RTE_I40E_RXQ_REARM_THRESH ' pkts once a
> > time based on its ring wrapping mechanism.
> >
> > For i40e txq, it must free ' txq->tx_rs_thresh' pkts once a time.
> >
> > So we need firstly ensure all tx free mbufs are valid, and then copy these into
> rxq.
> > If not enough valid mbufs, it will break rxq's ring wrapping mechanism.
> 
> I think you can still copy mbufs into rxep[], if there are not enough mbufs, you
> can still return 0 (or whatever is a proper value here), and that would mean all
> these new rxep[] entries will be considered as invalid.
> Anyway that's just a suggestion to avoid extra copy.

If I understand correctly, you means we can firstly copy mbufs into rxep.
If there are invalid buffers, previous copied buffers are also considered as invalid.

Thus, this can save CPU cycles in most of the correct cases. And just in few case,
We need to give up the copied rxep[] buffers.

That's a good comments, I agree that we can do like this.
> 
> >
> > >
> > > > + }
> > > > +
> > > > + /* Update counters for Tx. */
> > > > + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free +
> > > > + txq->txq->tx_rs_thresh); tx_next_dd = (uint16_t)(txq->tx_next_dd
> > > > + txq->+ txq->tx_rs_thresh);
> > > > + if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > +
> > > > + return n;
> > > > +}
  

Patch

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index f9d8f9791f..d4eecd16cf 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@  static const struct eth_dev_ops i40e_eth_dev_ops = {
 	.flow_ops_get                 = i40e_dev_flow_ops_get,
 	.rxq_info_get                 = i40e_rxq_info_get,
 	.txq_info_get                 = i40e_txq_info_get,
+	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
 	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
 	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
 	.timesync_enable              = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 9b806d130e..b5b2d6cf2b 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@  void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
+void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
 int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
 			   struct rte_eth_burst_mode *mode);
 int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
new file mode 100644
index 0000000000..08d708fd7d
--- /dev/null
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -0,0 +1,140 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Arm Limited.
+ */
+
+#include <stdint.h>
+#include <ethdev_driver.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
+{
+	struct i40e_rx_queue *rxq = rx_queue;
+	struct i40e_rx_entry *rxep;
+	volatile union i40e_rx_desc *rxdp;
+	uint16_t rx_id;
+	uint64_t paddr;
+	uint64_t dma_addr;
+	uint16_t i;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		/* Initialize rxdp descs. */
+		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr = rte_cpu_to_le_64(paddr);
+		/* flush desc with pa dma_addr */
+		rxdp[i].read.hdr_addr = 0;
+		rxdp[i].read.pkt_addr = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
+uint16_t
+i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	struct i40e_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];
+	int i, j, n;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	/* check DD bits on threshold descriptor */
+	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+		return 0;
+
+	n = txq->tx_rs_thresh;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if (refill_requirement != n ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++, rxep++, txep++)
+			*rxep = txep[0].mbuf;
+	} else {
+		for (i = 0, j = 0; i < n; i++) {
+			/* Avoid txq contains buffers from expected mempool. */
+			if (unlikely(recycle_rxq_info->mp
+						!= txep[i].mbuf->pool))
+				return 0;
+
+			m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* In case 1, each of Tx buffers should be the
+			 * last reference.
+			 */
+			if (unlikely(m[j] == NULL && refill_requirement))
+				return 0;
+			/* In case 2, the number of valid Tx free
+			 * buffers should be recorded.
+			 */
+			j++;
+		}
+		rte_memcpy(rxep, m, sizeof(void *) * j);
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return n;
+}
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 788ffb51c2..53cf787f04 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3197,6 +3197,30 @@  i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.offloads = txq->offloads;
 }
 
+void
+i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct i40e_rx_queue *rxq;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+	rxq = dev->data->rx_queues[queue_id];
+
+	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
+	recycle_rxq_info->mp = rxq->mp;
+	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
+	recycle_rxq_info->receive_tail = &rxq->rx_tail;
+
+	if (ad->rx_vec_allowed) {
+		recycle_rxq_info->refill_requirement = RTE_I40E_RXQ_REARM_THRESH;
+		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
+	} else {
+		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
+		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
+	}
+}
+
 #ifdef RTE_ARCH_X86
 static inline bool
 get_avx_supported(bool request_avx512)
@@ -3291,6 +3315,8 @@  i40e_set_rx_function(struct rte_eth_dev *dev)
 				dev->rx_pkt_burst = ad->rx_use_avx2 ?
 					i40e_recv_scattered_pkts_vec_avx2 :
 					i40e_recv_scattered_pkts_vec;
+				dev->recycle_rx_descriptors_refill =
+					i40e_recycle_rx_descriptors_refill_vec;
 			}
 		} else {
 			if (ad->rx_use_avx512) {
@@ -3309,9 +3335,12 @@  i40e_set_rx_function(struct rte_eth_dev *dev)
 				dev->rx_pkt_burst = ad->rx_use_avx2 ?
 					i40e_recv_pkts_vec_avx2 :
 					i40e_recv_pkts_vec;
+				dev->recycle_rx_descriptors_refill =
+					i40e_recycle_rx_descriptors_refill_vec;
 			}
 		}
 #else /* RTE_ARCH_X86 */
+		dev->recycle_rx_descriptors_refill = i40e_recycle_rx_descriptors_refill_vec;
 		if (dev->data->scattered_rx) {
 			PMD_INIT_LOG(DEBUG,
 				     "Using Vector Scattered Rx (port %d).",
@@ -3479,15 +3508,18 @@  i40e_set_tx_function(struct rte_eth_dev *dev)
 				dev->tx_pkt_burst = ad->tx_use_avx2 ?
 						    i40e_xmit_pkts_vec_avx2 :
 						    i40e_xmit_pkts_vec;
+				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
 			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
 			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..ed8921ddc0 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -233,6 +233,10 @@  uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
 int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
 
+uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+void i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs);
+
 uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts);
 uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build
index 8e53b87a65..58eb627abc 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -42,6 +42,8 @@  testpmd_sources = files('i40e_testpmd.c')
 deps += ['hash']
 includes += include_directories('base')
 
+sources += files('i40e_recycle_mbufs_vec_common.c')
+
 if arch_subdir == 'x86'
     sources += files('i40e_rxtx_vec_sse.c')