[v6,1/4] ethdev: add API for mbufs recycle mode

Message ID 20230525094541.331338-2-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Recycle mbufs from Tx queue to Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang May 25, 2023, 9:45 a.m. UTC
  Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
APIs to recycle used mbufs from a transmit queue of an Ethernet device,
and move these mbufs into a mbuf ring for a receive queue of an Ethernet
device. This can bypass mempool 'put/get' operations hence saving CPU
cycles.

For each recycling mbufs, the rte_eth_recycle_mbufs() function performs
the following operations:
- Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
ring.
- Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
from the Tx mbuf ring.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 doc/guides/rel_notes/release_23_07.rst |   7 +
 lib/ethdev/ethdev_driver.h             |  10 ++
 lib/ethdev/ethdev_private.c            |   2 +
 lib/ethdev/rte_ethdev.c                |  31 +++++
 lib/ethdev/rte_ethdev.h                | 182 +++++++++++++++++++++++++
 lib/ethdev/rte_ethdev_core.h           |  15 +-
 lib/ethdev/version.map                 |   4 +
 7 files changed, 249 insertions(+), 2 deletions(-)
  

Comments

Morten Brørup May 25, 2023, 3:08 p.m. UTC | #1
> From: Feifei Wang [mailto:feifei.wang2@arm.com]
> Sent: Thursday, 25 May 2023 11.46
> 
> Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
> APIs to recycle used mbufs from a transmit queue of an Ethernet device,
> and move these mbufs into a mbuf ring for a receive queue of an Ethernet
> device. This can bypass mempool 'put/get' operations hence saving CPU
> cycles.
> 
> For each recycling mbufs, the rte_eth_recycle_mbufs() function performs
> the following operations:
> - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
> ring.
> - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
> from the Tx mbuf ring.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---

[...]

> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 2c9d615fb5..c6723d5277 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -59,6 +59,10 @@ struct rte_eth_dev {
>  	eth_rx_descriptor_status_t rx_descriptor_status;
>  	/** Check the status of a Tx descriptor */
>  	eth_tx_descriptor_status_t tx_descriptor_status;
> +	/** Pointer to PMD transmit mbufs reuse function */
> +	eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> +	/** Pointer to PMD receive descriptors refill function */
> +	eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> 
>  	/**
>  	 * Device data that is shared between primary and secondary processes

The rte_eth_dev struct currently looks like this:

/**
 * @internal
 * The generic data structure associated with each Ethernet device.
 *
 * Pointers to burst-oriented packet receive and transmit functions are
 * located at the beginning of the structure, along with the pointer to
 * where all the data elements for the particular device are stored in shared
 * memory. This split allows the function pointer and driver data to be per-
 * process, while the actual configuration data for the device is shared.
 */
struct rte_eth_dev {
	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function */
	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function */

	/** Pointer to PMD transmit prepare function */
	eth_tx_prep_t tx_pkt_prepare;
	/** Get the number of used Rx descriptors */
	eth_rx_queue_count_t rx_queue_count;
	/** Check the status of a Rx descriptor */
	eth_rx_descriptor_status_t rx_descriptor_status;
	/** Check the status of a Tx descriptor */
	eth_tx_descriptor_status_t tx_descriptor_status;

	/**
	 * Device data that is shared between primary and secondary processes
	 */
	struct rte_eth_dev_data *data;
	void *process_private; /**< Pointer to per-process device data */
	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
	struct rte_device *device; /**< Backing device */
	struct rte_intr_handle *intr_handle; /**< Device interrupt handle */

	/** User application callbacks for NIC interrupts */
	struct rte_eth_dev_cb_list link_intr_cbs;
	/**
	 * User-supplied functions called from rx_burst to post-process
	 * received packets before passing them to the user
	 */
	struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
	/**
	 * User-supplied functions called from tx_burst to pre-process
	 * received packets before passing them to the driver for transmission
	 */
	struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];

	enum rte_eth_dev_state state; /**< Flag indicating the port state */
	void *security_ctx; /**< Context for security ops */
} __rte_cache_aligned;

Inserting the two new function pointers (recycle_tx_mbufs_reuse and recycle_rx_descriptors_refill) as the 7th and 8th fields will move the 'data' and 'process_private' pointers out of the first cache line.

If those data pointers are used in the fast path with the rx_pkt_burst and tx_pkt_burst functions, moving them to a different cache line might have a performance impact on those two functions.

Disclaimer: This is a big "if", and wild speculation from me, because I haven't looked at it in detail! If this structure is not used in the fast path like this, you can ignore my suggestion below.

Please consider moving the 'data' and 'process_private' pointers to the beginning of this structure, so they are kept in the same cache line as the rx_pkt_burst and tx_pkt_burst function pointers.

I don't know the relative importance of the remaining six fast path functions (the four existing ones plus the two new ones in this patch), so you could also rearrange those, so the least important two functions are moved out of the first cache line. It doesn't have to be the two recycle functions that go into a different cache line.

-Morten
  
Feifei Wang May 31, 2023, 6:10 a.m. UTC | #2
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Thursday, May 25, 2023 11:09 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Ferruh
> Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Subject: RE: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode
> 
> > From: Feifei Wang [mailto:feifei.wang2@arm.com]
> > Sent: Thursday, 25 May 2023 11.46
> >
> > Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
> > APIs to recycle used mbufs from a transmit queue of an Ethernet
> > device, and move these mbufs into a mbuf ring for a receive queue of
> > an Ethernet device. This can bypass mempool 'put/get' operations hence
> > saving CPU cycles.
> >
> > For each recycling mbufs, the rte_eth_recycle_mbufs() function
> > performs the following operations:
> > - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
> > ring.
> > - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs
> > freed from the Tx mbuf ring.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> 
> [...]
> 
> > diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> > index 2c9d615fb5..c6723d5277 100644
> > --- a/lib/ethdev/ethdev_driver.h
> > +++ b/lib/ethdev/ethdev_driver.h
> > @@ -59,6 +59,10 @@ struct rte_eth_dev {
> >  	eth_rx_descriptor_status_t rx_descriptor_status;
> >  	/** Check the status of a Tx descriptor */
> >  	eth_tx_descriptor_status_t tx_descriptor_status;
> > +	/** Pointer to PMD transmit mbufs reuse function */
> > +	eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> > +	/** Pointer to PMD receive descriptors refill function */
> > +	eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> >
> >  	/**
> >  	 * Device data that is shared between primary and secondary
> > processes
> 
> The rte_eth_dev struct currently looks like this:
> 
> /**
>  * @internal
>  * The generic data structure associated with each Ethernet device.
>  *
>  * Pointers to burst-oriented packet receive and transmit functions are
>  * located at the beginning of the structure, along with the pointer to
>  * where all the data elements for the particular device are stored in shared
>  * memory. This split allows the function pointer and driver data to be per-
>  * process, while the actual configuration data for the device is shared.
>  */
> struct rte_eth_dev {
> 	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function */
> 	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function */
> 
> 	/** Pointer to PMD transmit prepare function */
> 	eth_tx_prep_t tx_pkt_prepare;
> 	/** Get the number of used Rx descriptors */
> 	eth_rx_queue_count_t rx_queue_count;
> 	/** Check the status of a Rx descriptor */
> 	eth_rx_descriptor_status_t rx_descriptor_status;
> 	/** Check the status of a Tx descriptor */
> 	eth_tx_descriptor_status_t tx_descriptor_status;
> 
> 	/**
> 	 * Device data that is shared between primary and secondary
> processes
> 	 */
> 	struct rte_eth_dev_data *data;
> 	void *process_private; /**< Pointer to per-process device data */
> 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD
> */
> 	struct rte_device *device; /**< Backing device */
> 	struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
> 
> 	/** User application callbacks for NIC interrupts */
> 	struct rte_eth_dev_cb_list link_intr_cbs;
> 	/**
> 	 * User-supplied functions called from rx_burst to post-process
> 	 * received packets before passing them to the user
> 	 */
> 	struct rte_eth_rxtx_callback
> *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
> 	/**
> 	 * User-supplied functions called from tx_burst to pre-process
> 	 * received packets before passing them to the driver for transmission
> 	 */
> 	struct rte_eth_rxtx_callback
> *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
> 
> 	enum rte_eth_dev_state state; /**< Flag indicating the port state */
> 	void *security_ctx; /**< Context for security ops */ }
> __rte_cache_aligned;
> 
> Inserting the two new function pointers (recycle_tx_mbufs_reuse and
> recycle_rx_descriptors_refill) as the 7th and 8th fields will move the 'data' and
> 'process_private' pointers out of the first cache line.
> 
> If those data pointers are used in the fast path with the rx_pkt_burst and
> tx_pkt_burst functions, moving them to a different cache line might have a
> performance impact on those two functions.
> 
> Disclaimer: This is a big "if", and wild speculation from me, because I haven't
> looked at it in detail! If this structure is not used in the fast path like this, you
> can ignore my suggestion below.
> 
> Please consider moving the 'data' and 'process_private' pointers to the
> beginning of this structure, so they are kept in the same cache line as the
> rx_pkt_burst and tx_pkt_burst function pointers.
> 
> I don't know the relative importance of the remaining six fast path functions
> (the four existing ones plus the two new ones in this patch), so you could also
> rearrange those, so the least important two functions are moved out of the
> first cache line. It doesn't have to be the two recycle functions that go into a
> different cache line.
> 
> -Morten

This is a good question~. By reviewing the code, we find the pointers which are used for fast path
can be mapped to  structure 'rte_eth_fp_ops *fpo', this ensures all fast path pointers are in the same
Rx/Tx cacheline

void
eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
		const struct rte_eth_dev *dev)
{
	fpo->rx_pkt_burst = dev->rx_pkt_burst;
	fpo->tx_pkt_burst = dev->tx_pkt_burst;
	fpo->tx_pkt_prepare = dev->tx_pkt_prepare;
	fpo->rx_queue_count = dev->rx_queue_count;
	fpo->rx_descriptor_status = dev->rx_descriptor_status;
	fpo->tx_descriptor_status = dev->tx_descriptor_status;
	fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse;
	fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill;

	fpo->rxq.data = dev->data->rx_queues;
	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;

	fpo->txq.data = dev->data->tx_queues;
	fpo->txq.clbk = (void **)(uintptr_t)dev->pre_tx_burst_cbs;
}

Besides rx_queues and tx_queues pointer are important for fast path,  other members of
'data' and 'process_private' are for slow path. So it is not necessary for these members to be
in the cacheline.
  
Feifei Wang June 6, 2023, 2:55 a.m. UTC | #3
Thanks for the comments.

From: Константин Ананьев <konstantin.v.ananyev@yandex.ru>
Sent: Monday, June 5, 2023 8:54 PM
To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Ferruh Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
Subject: Re: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode



Hi Feifei,

few more comments from me, see below.

Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
APIs to recycle used mbufs from a transmit queue of an Ethernet device,
and move these mbufs into a mbuf ring for a receive queue of an Ethernet
device. This can bypass mempool 'put/get' operations hence saving CPU
cycles.

For each recycling mbufs, the rte_eth_recycle_mbufs() function performs
the following operations:
- Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
ring.
- Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
from the Tx mbuf ring.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com<mailto:honnappa.nagarahalli@arm.com>>
Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com<mailto:ruifeng.wang@arm.com>>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com<mailto:feifei.wang2@arm.com>>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com<mailto:ruifeng.wang@arm.com>>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com<mailto:honnappa.nagarahalli@arm.com>>
---
 doc/guides/rel_notes/release_23_07.rst | 7 +
 lib/ethdev/ethdev_driver.h | 10 ++
 lib/ethdev/ethdev_private.c | 2 +
 lib/ethdev/rte_ethdev.c | 31 +++++
 lib/ethdev/rte_ethdev.h | 182 +++++++++++++++++++++++++
 lib/ethdev/rte_ethdev_core.h | 15 +-
 lib/ethdev/version.map | 4 +
 7 files changed, 249 insertions(+), 2 deletions(-)

diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst
index a9b1293689..f279036cb9 100644
--- a/doc/guides/rel_notes/release_23_07.rst
+++ b/doc/guides/rel_notes/release_23_07.rst
@@ -55,6 +55,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================

+* **Add mbufs recycling support. **
+ Added ``rte_eth_recycle_rx_queue_info_get`` and ``rte_eth_recycle_mbufs``
+ APIs which allow the user to copy used mbufs from the Tx mbuf ring
+ into the Rx mbuf ring. This feature supports the case that the Rx Ethernet
+ device is different from the Tx Ethernet device with respective driver
+ callback functions in ``rte_eth_recycle_mbufs``.
+

 Removed Items
 -------------
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 2c9d615fb5..c6723d5277 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -59,6 +59,10 @@ struct rte_eth_dev {
         eth_rx_descriptor_status_t rx_descriptor_status;
         /** Check the status of a Tx descriptor */
         eth_tx_descriptor_status_t tx_descriptor_status;
+ /** Pointer to PMD transmit mbufs reuse function */
+ eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
+ /** Pointer to PMD receive descriptors refill function */
+ eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;

         /**
          * Device data that is shared between primary and secondary processes
@@ -504,6 +508,10 @@ typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
 typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
         uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);

+typedef void (*eth_recycle_rxq_info_get_t)(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
 typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev,
         uint16_t queue_id, struct rte_eth_burst_mode *mode);

@@ -1247,6 +1255,8 @@ struct eth_dev_ops {
         eth_rxq_info_get_t rxq_info_get;
         /** Retrieve Tx queue information */
         eth_txq_info_get_t txq_info_get;
+ /** Retrieve mbufs recycle Rx queue information */
+ eth_recycle_rxq_info_get_t recycle_rxq_info_get;
         eth_burst_mode_get_t rx_burst_mode_get; /**< Get Rx burst mode */
         eth_burst_mode_get_t tx_burst_mode_get; /**< Get Tx burst mode */
         eth_fw_version_get_t fw_version_get; /**< Get firmware version */
diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 14ec8c6ccf..f8ab64f195 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -277,6 +277,8 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
         fpo->rx_queue_count = dev->rx_queue_count;
         fpo->rx_descriptor_status = dev->rx_descriptor_status;
         fpo->tx_descriptor_status = dev->tx_descriptor_status;
+ fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse;
+ fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill;

         fpo->rxq.data = dev->data->rx_queues;
         fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 4d03255683..7c27dcfea4 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5784,6 +5784,37 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
         return 0;
 }

+int
+rte_eth_recycle_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+ dev = &rte_eth_devices[port_id];
+
+ if (queue_id >= dev->data->nb_rx_queues) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
+ return -EINVAL;
+ }
+
+ if (dev->data->rx_queues == NULL ||
+ dev->data->rx_queues[queue_id] == NULL) {
+ RTE_ETHDEV_LOG(ERR,
+ "Rx queue %"PRIu16" of device with port_id=%"
+ PRIu16" has not been setup\n",
+ queue_id, port_id);
+ return -EINVAL;
+ }
+
+ if (*dev->dev_ops->recycle_rxq_info_get == NULL)
+ return -ENOTSUP;
+
+ dev->dev_ops->recycle_rxq_info_get(dev, queue_id, recycle_rxq_info);
+
+ return 0;
+}
+
 int
 rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id,
                           struct rte_eth_burst_mode *mode)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 99fe9e238b..7434aa2483 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1820,6 +1820,30 @@ struct rte_eth_txq_info {
         uint8_t queue_state; /**< one of RTE_ETH_QUEUE_STATE_*. */
 } __rte_cache_min_aligned;

+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * Ethernet device Rx queue information structure for recycling mbufs.
+ * Used to retrieve Rx queue information when Tx queue reusing mbufs and moving
+ * them into Rx mbuf ring.
+ */
+struct rte_eth_recycle_rxq_info {
+ struct rte_mbuf **mbuf_ring; /**< mbuf ring of Rx queue. */
+ struct rte_mempool *mp; /**< mempool of Rx queue. */
+ uint16_t *refill_head; /**< head of Rx queue refilling mbufs. */
+ uint16_t *receive_tail; /**< tail of Rx queue receiving pkts. */
+ uint16_t mbuf_ring_size; /**< configured number of mbuf ring size. */
+ /**
+ * Requirement on mbuf refilling batch size of Rx mbuf ring.
+ * For some PMD drivers, the number of Rx mbuf ring refilling mbufs
+ * should be aligned with mbuf ring size, in order to simplify
+ * ring wrapping around.
+ * Value 0 means that PMD drivers have no requirement for this.
+ */
+ uint16_t refill_requirement;
+} __rte_cache_min_aligned;
+
 /* Generic Burst mode flag definition, values can be ORed. */

 /**
@@ -4809,6 +4833,31 @@ int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
         struct rte_eth_txq_info *qinfo);

+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Retrieve information about given ports's Rx queue for recycling mbufs.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param queue_id
+ * The Rx queue on the Ethernet devicefor which information
+ * will be retrieved.
+ * @param recycle_rxq_info
+ * A pointer to a structure of type *rte_eth_recycle_rxq_info* to be filled.
+ *
+ * @return
+ * - 0: Success
+ * - -ENODEV: If *port_id* is invalid.
+ * - -ENOTSUP: routine is not supported by the device PMD.
+ * - -EINVAL: The queue_id is out of range.
+ */
+__rte_experimental
+int rte_eth_recycle_rx_queue_info_get(uint16_t port_id,
+ uint16_t queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
 /**
  * Retrieve information about the Rx packet burst mode.
  *
@@ -6483,6 +6532,139 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
         return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
 }

+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Recycle used mbufs from a transmit queue of an Ethernet device, and move
+ * these mbufs into a mbuf ring for a receive queue of an Ethernet device.
+ * This can bypass mempool path to save CPU cycles.
+ *
+ * The rte_eth_recycle_mbufs() function loops, with rte_eth_rx_burst() and
+ * rte_eth_tx_burst() functions, freeing Tx used mbufs and replenishing Rx
+ * descriptors. The number of recycling mbufs depends on the request of Rx mbuf
+ * ring, with the constraint of enough used mbufs from Tx mbuf ring.
+ *
+ * For each recycling mbufs, the rte_eth_recycle_mbufs() function performs the
+ * following operations:
+ *
+ * - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf ring.
+ *
+ * - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
+ * from the Tx mbuf ring.
+ *
+ * This function spilts Rx and Tx path with different callback functions. The
+ * callback function recycle_tx_mbufs_reuse is for Tx driver. The callback
+ * function recycle_rx_descriptors_refill is for Rx driver. rte_eth_recycle_mbufs()
+ * can support the case that Rx Ethernet device is different from Tx Ethernet device.
+ *
+ * It is the responsibility of users to select the Rx/Tx queue pair to recycle
+ * mbufs. Before call this function, users must call rte_eth_recycle_rxq_info_get
+ * function to retrieve selected Rx queue information.
+ * @see rte_eth_recycle_rxq_info_get, struct rte_eth_recycle_rxq_info
+ *
+ * Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
+ * between the receive queue and transmit queue. Do not pair one receive queue with
+ * multiple transmit queues or pair one transmit queue with multiple receive queues,
+ * in order to avoid memory error rewriting.
Probably I am missing something, but why it is not possible to do something like that:

rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
....
rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);

I.E. feed rx queue from 2 tx queues?

Two problems for this:

  1.  If we have 2 tx queues for rx, the thread should make the extra judgement to

decide which one to choose in the driver layer.

On the other hand, current mechanism can support users to switch 1 txq to another timely

in the application layer. If user want to choose another txq, he just need to change the txq_queue_id parameter

in the API.

  1.  If you want one rxq to support two txq at the same time, this needs to add spinlock on guard variable to

avoid multi-thread conflict. Spinlock will decrease the data-path performance greatly.  Thus, we do not consider

1 rxq mapping multiple txqs here.

+ *
+ * @param rx_port_id
+ * Port identifying the receive side.
+ * @param rx_queue_id
+ * The index of the receive queue identifying the receive side.
+ * The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @param tx_port_id
+ * Port identifying the transmit side.
+ * @param tx_queue_id
+ * The index of the transmit queue identifying the transmit side.
+ * The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ * to rte_eth_dev_configure().
+ * @param recycle_rxq_info
+ * A pointer to a structure of type *rte_eth_recycle_rxq_info* which contains
+ * the information of the Rx queue mbuf ring.
+ * @return
+ * The number of recycling mbufs.
+ */
+__rte_experimental
+static inline uint16_t
+rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
+ uint16_t tx_port_id, uint16_t tx_queue_id,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+ struct rte_eth_fp_ops *p;
+ void *qd;
+ uint16_t nb_mbufs;
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+ if (tx_port_id >= RTE_MAX_ETHPORTS ||
+ tx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid tx_port_id=%u or tx_queue_id=%u\n",
+ tx_port_id, tx_queue_id);
+ return 0;
+ }
+#endif
+
+ /* fetch pointer to queue data */
+ p = &rte_eth_fp_ops[tx_port_id];
+ qd = p->txq.data[tx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
+
+ if (qd == NULL) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
+ tx_queue_id, tx_port_id);
+ return 0;
+ }
+#endif
+ if (p->recycle_tx_mbufs_reuse == NULL)
+ return 0;
+
+ /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
+ * into Rx mbuf ring.
+ */
+ nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
+
+ /* If no recycling mbufs, return 0. */
+ if (nb_mbufs == 0)
+ return 0;
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+ if (rx_port_id >= RTE_MAX_ETHPORTS ||
+ rx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+ RTE_ETHDEV_LOG(ERR, "Invalid rx_port_id=%u or rx_queue_id=%u\n",
+ rx_port_id, rx_queue_id);
+ return 0;
+ }
+#endif
+
+ /* fetch pointer to queue data */
+ p = &rte_eth_fp_ops[rx_port_id];
+ qd = p->rxq.data[rx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
+
+ if (qd == NULL) {
+ RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
+ rx_queue_id, rx_port_id);
+ return 0;
+ }
+#endif
+
+ if (p->recycle_rx_descriptors_refill == NULL)
+ return 0;
+
+ /* Replenish the Rx descriptors with the recycling
+ * into Rx mbuf ring.
+ */
+ p->recycle_rx_descriptors_refill(qd, nb_mbufs);
+
+ return nb_mbufs;
+}
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index dcf8adab92..a2e6ea6b6c 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
 /** @internal Check the status of a Tx descriptor */
 typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);

+/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
+typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,
+ struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
+/** @internal Refill Rx descriptors with the recycling mbufs */
+typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
+
 /**
  * @internal
  * Structure used to hold opaque pointers to internal ethdev Rx/Tx
@@ -90,9 +97,11 @@ struct rte_eth_fp_ops {
         eth_rx_queue_count_t rx_queue_count;
         /** Check the status of a Rx descriptor. */
         eth_rx_descriptor_status_t rx_descriptor_status;
+ /** Refill Rx descriptors with the recycling mbufs. */
+ eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
I am afraid we can't put new fields here without ABI breakage.

Agree

It has to be below rxq.
Now thinking about current layout probably not the best one,
and when introducing this struct, I should probably put rxq either
on the top of the struct, or on the next cache line.
But such change is not possible right now anyway.
Same story for txq.

Thus we should rearrange the structure like below:
struct rte_eth_fp_ops {
    struct rte_ethdev_qdata rxq;
         eth_rx_burst_t rx_pkt_burst;
         eth_rx_queue_count_t rx_queue_count;
         eth_rx_descriptor_status_t rx_descriptor_status;
       eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
              uintptr_t reserved1[2];
}



         /** Rx queues data. */
         struct rte_ethdev_qdata rxq;
- uintptr_t reserved1[3];
+ uintptr_t reserved1[2];
         /**@}*/

         /**@{*/
@@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
         eth_tx_prep_t tx_pkt_prepare;
         /** Check the status of a Tx descriptor. */
         eth_tx_descriptor_status_t tx_descriptor_status;
+ /** Copy used mbufs from Tx mbuf ring into Rx. */
+ eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
         /** Tx queues data. */
         struct rte_ethdev_qdata txq;
- uintptr_t reserved2[3];
+ uintptr_t reserved2[2];
         /**@}*/

 } __rte_cache_aligned;
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 357d1a88c0..45c417f6bd 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -299,6 +299,10 @@ EXPERIMENTAL {
         rte_flow_action_handle_query_update;
         rte_flow_async_action_handle_query_update;
         rte_flow_async_create_by_index;
+
+ # added in 23.07
+ rte_eth_recycle_mbufs;
+ rte_eth_recycle_rx_queue_info_get;
 };

 INTERNAL {
--
2.25.1
  
Konstantin Ananyev June 6, 2023, 7:10 a.m. UTC | #4
> 
> Thanks for the comments.
> 
> From: Константин Ананьев <mailto:konstantin.v.ananyev@yandex.ru>
> Sent: Monday, June 5, 2023 8:54 PM
> To: Feifei Wang <mailto:Feifei.Wang2@arm.com>; mailto:thomas@monjalon.net; Ferruh Yigit <mailto:ferruh.yigit@amd.com>;
> Andrew Rybchenko <mailto:andrew.rybchenko@oktetlabs.ru>
> Cc: mailto:dev@dpdk.org; nd <mailto:nd@arm.com>; Honnappa Nagarahalli <mailto:Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <mailto:Ruifeng.Wang@arm.com>
> Subject: Re: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode
> 
> 
> 
> Hi Feifei,
> 
> few more comments from me, see below.
> Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs'
> APIs to recycle used mbufs from a transmit queue of an Ethernet device,
> and move these mbufs into a mbuf ring for a receive queue of an Ethernet
> device. This can bypass mempool 'put/get' operations hence saving CPU
> cycles.
> 
> For each recycling mbufs, the rte_eth_recycle_mbufs() function performs
> the following operations:
> - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf
> ring.
> - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
> from the Tx mbuf ring.
> 
> Suggested-by: Honnappa Nagarahalli <mailto:honnappa.nagarahalli@arm.com>
> Suggested-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> Signed-off-by: Feifei Wang <mailto:feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <mailto:ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <mailto:honnappa.nagarahalli@arm.com>
> ---
>  doc/guides/rel_notes/release_23_07.rst | 7 +
>  lib/ethdev/ethdev_driver.h | 10 ++
>  lib/ethdev/ethdev_private.c | 2 +
>  lib/ethdev/rte_ethdev.c | 31 +++++
>  lib/ethdev/rte_ethdev.h | 182 +++++++++++++++++++++++++
>  lib/ethdev/rte_ethdev_core.h | 15 +-
>  lib/ethdev/version.map | 4 +
>  7 files changed, 249 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst
> index a9b1293689..f279036cb9 100644
> --- a/doc/guides/rel_notes/release_23_07.rst
> +++ b/doc/guides/rel_notes/release_23_07.rst
> @@ -55,6 +55,13 @@ New Features
>       Also, make sure to start the actual text at the margin.
>       =======================================================
> 
> +* **Add mbufs recycling support. **
> + Added ``rte_eth_recycle_rx_queue_info_get`` and ``rte_eth_recycle_mbufs``
> + APIs which allow the user to copy used mbufs from the Tx mbuf ring
> + into the Rx mbuf ring. This feature supports the case that the Rx Ethernet
> + device is different from the Tx Ethernet device with respective driver
> + callback functions in ``rte_eth_recycle_mbufs``.
> +
> 
>  Removed Items
>  -------------
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 2c9d615fb5..c6723d5277 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -59,6 +59,10 @@ struct rte_eth_dev {
>          eth_rx_descriptor_status_t rx_descriptor_status;
>          /** Check the status of a Tx descriptor */
>          eth_tx_descriptor_status_t tx_descriptor_status;
> + /** Pointer to PMD transmit mbufs reuse function */
> + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> + /** Pointer to PMD receive descriptors refill function */
> + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> 
>          /**
>           * Device data that is shared between primary and secondary processes
> @@ -504,6 +508,10 @@ typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
>  typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
>          uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);
> 
> +typedef void (*eth_recycle_rxq_info_get_t)(struct rte_eth_dev *dev,
> + uint16_t rx_queue_id,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> +
>  typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev,
>          uint16_t queue_id, struct rte_eth_burst_mode *mode);
> 
> @@ -1247,6 +1255,8 @@ struct eth_dev_ops {
>          eth_rxq_info_get_t rxq_info_get;
>          /** Retrieve Tx queue information */
>          eth_txq_info_get_t txq_info_get;
> + /** Retrieve mbufs recycle Rx queue information */
> + eth_recycle_rxq_info_get_t recycle_rxq_info_get;
>          eth_burst_mode_get_t rx_burst_mode_get; /**< Get Rx burst mode */
>          eth_burst_mode_get_t tx_burst_mode_get; /**< Get Tx burst mode */
>          eth_fw_version_get_t fw_version_get; /**< Get firmware version */
> diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> index 14ec8c6ccf..f8ab64f195 100644
> --- a/lib/ethdev/ethdev_private.c
> +++ b/lib/ethdev/ethdev_private.c
> @@ -277,6 +277,8 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
>          fpo->rx_queue_count = dev->rx_queue_count;
>          fpo->rx_descriptor_status = dev->rx_descriptor_status;
>          fpo->tx_descriptor_status = dev->tx_descriptor_status;
> + fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse;
> + fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill;
> 
>          fpo->rxq.data = dev->data->rx_queues;
>          fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 4d03255683..7c27dcfea4 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -5784,6 +5784,37 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
>          return 0;
>  }
> 
> +int
> +rte_eth_recycle_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info)
> +{
> + struct rte_eth_dev *dev;
> +
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> + dev = &rte_eth_devices[port_id];
> +
> + if (queue_id >= dev->data->nb_rx_queues) {
> + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
> + return -EINVAL;
> + }
> +
> + if (dev->data->rx_queues == NULL ||
> + dev->data->rx_queues[queue_id] == NULL) {
> + RTE_ETHDEV_LOG(ERR,
> + "Rx queue %"PRIu16" of device with port_id=%"
> + PRIu16" has not been setup\n",
> + queue_id, port_id);
> + return -EINVAL;
> + }
> +
> + if (*dev->dev_ops->recycle_rxq_info_get == NULL)
> + return -ENOTSUP;
> +
> + dev->dev_ops->recycle_rxq_info_get(dev, queue_id, recycle_rxq_info);
> +
> + return 0;
> +}
> +
>  int
>  rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id,
>                            struct rte_eth_burst_mode *mode)
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index 99fe9e238b..7434aa2483 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1820,6 +1820,30 @@ struct rte_eth_txq_info {
>          uint8_t queue_state; /**< one of RTE_ETH_QUEUE_STATE_*. */
>  } __rte_cache_min_aligned;
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this structure may change without prior notice.
> + *
> + * Ethernet device Rx queue information structure for recycling mbufs.
> + * Used to retrieve Rx queue information when Tx queue reusing mbufs and moving
> + * them into Rx mbuf ring.
> + */
> +struct rte_eth_recycle_rxq_info {
> + struct rte_mbuf **mbuf_ring; /**< mbuf ring of Rx queue. */
> + struct rte_mempool *mp; /**< mempool of Rx queue. */
> + uint16_t *refill_head; /**< head of Rx queue refilling mbufs. */
> + uint16_t *receive_tail; /**< tail of Rx queue receiving pkts. */
> + uint16_t mbuf_ring_size; /**< configured number of mbuf ring size. */
> + /**
> + * Requirement on mbuf refilling batch size of Rx mbuf ring.
> + * For some PMD drivers, the number of Rx mbuf ring refilling mbufs
> + * should be aligned with mbuf ring size, in order to simplify
> + * ring wrapping around.
> + * Value 0 means that PMD drivers have no requirement for this.
> + */
> + uint16_t refill_requirement;
> +} __rte_cache_min_aligned;
> +
>  /* Generic Burst mode flag definition, values can be ORed. */
> 
>  /**
> @@ -4809,6 +4833,31 @@ int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
>  int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
>          struct rte_eth_txq_info *qinfo);
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Retrieve information about given ports's Rx queue for recycling mbufs.
> + *
> + * @param port_id
> + * The port identifier of the Ethernet device.
> + * @param queue_id
> + * The Rx queue on the Ethernet devicefor which information
> + * will be retrieved.
> + * @param recycle_rxq_info
> + * A pointer to a structure of type *rte_eth_recycle_rxq_info* to be filled.
> + *
> + * @return
> + * - 0: Success
> + * - -ENODEV: If *port_id* is invalid.
> + * - -ENOTSUP: routine is not supported by the device PMD.
> + * - -EINVAL: The queue_id is out of range.
> + */
> +__rte_experimental
> +int rte_eth_recycle_rx_queue_info_get(uint16_t port_id,
> + uint16_t queue_id,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> +
>  /**
>   * Retrieve information about the Rx packet burst mode.
>   *
> @@ -6483,6 +6532,139 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
>          return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>  }
> 
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Recycle used mbufs from a transmit queue of an Ethernet device, and move
> + * these mbufs into a mbuf ring for a receive queue of an Ethernet device.
> + * This can bypass mempool path to save CPU cycles.
> + *
> + * The rte_eth_recycle_mbufs() function loops, with rte_eth_rx_burst() and
> + * rte_eth_tx_burst() functions, freeing Tx used mbufs and replenishing Rx
> + * descriptors. The number of recycling mbufs depends on the request of Rx mbuf
> + * ring, with the constraint of enough used mbufs from Tx mbuf ring.
> + *
> + * For each recycling mbufs, the rte_eth_recycle_mbufs() function performs the
> + * following operations:
> + *
> + * - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf ring.
> + *
> + * - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
> + * from the Tx mbuf ring.
> + *
> + * This function spilts Rx and Tx path with different callback functions. The
> + * callback function recycle_tx_mbufs_reuse is for Tx driver. The callback
> + * function recycle_rx_descriptors_refill is for Rx driver. rte_eth_recycle_mbufs()
> + * can support the case that Rx Ethernet device is different from Tx Ethernet device.
> + *
> + * It is the responsibility of users to select the Rx/Tx queue pair to recycle
> + * mbufs. Before call this function, users must call rte_eth_recycle_rxq_info_get
> + * function to retrieve selected Rx queue information.
> + * @see rte_eth_recycle_rxq_info_get, struct rte_eth_recycle_rxq_info
> + *
> + * Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
> + * between the receive queue and transmit queue. Do not pair one receive queue with
> + * multiple transmit queues or pair one transmit queue with multiple receive queues,
> + * in order to avoid memory error rewriting.
> Probably I am missing something, but why it is not possible to do something like that:
> 
> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
> ....
> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);
> 
> I.E. feed rx queue from 2 tx queues?
> 
> Two problems for this:
> 1. If we have 2 tx queues for rx, the thread should make the extra judgement to
> decide which one to choose in the driver layer.

Not sure, why on the driver layer?
The example I gave above - decision is made on application layer.
Lets say first call didn't free enough mbufs, so app decided to use second txq for rearm.

> On the other hand, current mechanism can support users to switch 1 txq to another timely
> in the application layer. If user want to choose another txq, he just need to change the txq_queue_id parameter
> in the API.
> 2. If you want one rxq to support two txq at the same time, this needs to add spinlock on guard variable to
> avoid multi-thread conflict. Spinlock will decrease the data-path performance greatly.  Thus, we do not consider
> 1 rxq mapping multiple txqs here.
 
I am talking about situation when one thread controls 2 tx queues.

> + *
> + * @param rx_port_id
> + * Port identifying the receive side.
> + * @param rx_queue_id
> + * The index of the receive queue identifying the receive side.
> + * The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + * to rte_eth_dev_configure().
> + * @param tx_port_id
> + * Port identifying the transmit side.
> + * @param tx_queue_id
> + * The index of the transmit queue identifying the transmit side.
> + * The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + * to rte_eth_dev_configure().
> + * @param recycle_rxq_info
> + * A pointer to a structure of type *rte_eth_recycle_rxq_info* which contains
> + * the information of the Rx queue mbuf ring.
> + * @return
> + * The number of recycling mbufs.
> + */
> +__rte_experimental
> +static inline uint16_t
> +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
> + uint16_t tx_port_id, uint16_t tx_queue_id,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info)
> +{
> + struct rte_eth_fp_ops *p;
> + void *qd;
> + uint16_t nb_mbufs;
> +
> +#ifdef RTE_ETHDEV_DEBUG_TX
> + if (tx_port_id >= RTE_MAX_ETHPORTS ||
> + tx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> + RTE_ETHDEV_LOG(ERR,
> + "Invalid tx_port_id=%u or tx_queue_id=%u\n",
> + tx_port_id, tx_queue_id);
> + return 0;
> + }
> +#endif
> +
> + /* fetch pointer to queue data */
> + p = &rte_eth_fp_ops[tx_port_id];
> + qd = p->txq.data[tx_queue_id];
> +
> +#ifdef RTE_ETHDEV_DEBUG_TX
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
> +
> + if (qd == NULL) {
> + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
> + tx_queue_id, tx_port_id);
> + return 0;
> + }
> +#endif
> + if (p->recycle_tx_mbufs_reuse == NULL)
> + return 0;
> +
> + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
> + * into Rx mbuf ring.
> + */
> + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
> +
> + /* If no recycling mbufs, return 0. */
> + if (nb_mbufs == 0)
> + return 0;
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> + if (rx_port_id >= RTE_MAX_ETHPORTS ||
> + rx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> + RTE_ETHDEV_LOG(ERR, "Invalid rx_port_id=%u or rx_queue_id=%u\n",
> + rx_port_id, rx_queue_id);
> + return 0;
> + }
> +#endif
> +
> + /* fetch pointer to queue data */
> + p = &rte_eth_fp_ops[rx_port_id];
> + qd = p->rxq.data[rx_queue_id];
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
> +
> + if (qd == NULL) {
> + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> + rx_queue_id, rx_port_id);
> + return 0;
> + }
> +#endif
> +
> + if (p->recycle_rx_descriptors_refill == NULL)
> + return 0;
> +
> + /* Replenish the Rx descriptors with the recycling
> + * into Rx mbuf ring.
> + */
> + p->recycle_rx_descriptors_refill(qd, nb_mbufs);
> +
> + return nb_mbufs;
> +}
> +
>  /**
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
> diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> index dcf8adab92..a2e6ea6b6c 100644
> --- a/lib/ethdev/rte_ethdev_core.h
> +++ b/lib/ethdev/rte_ethdev_core.h
> @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
>  /** @internal Check the status of a Tx descriptor */
>  typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> 
> +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
> +typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,
> + struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> +
> +/** @internal Refill Rx descriptors with the recycling mbufs */
> +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> +
>  /**
>   * @internal
>   * Structure used to hold opaque pointers to internal ethdev Rx/Tx
> @@ -90,9 +97,11 @@ struct rte_eth_fp_ops {
>          eth_rx_queue_count_t rx_queue_count;
>          /** Check the status of a Rx descriptor. */
>          eth_rx_descriptor_status_t rx_descriptor_status;
> + /** Refill Rx descriptors with the recycling mbufs. */
> + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> I am afraid we can't put new fields here without ABI breakage.
> 
> Agree
> 
> It has to be below rxq.
> Now thinking about current layout probably not the best one,
> and when introducing this struct, I should probably put rxq either
> on the top of the struct, or on the next cache line.
> But such change is not possible right now anyway.
> Same story for txq.
> 
> Thus we should rearrange the structure like below:
> struct rte_eth_fp_ops {
>     struct rte_ethdev_qdata rxq;
>          eth_rx_burst_t rx_pkt_burst;
>          eth_rx_queue_count_t rx_queue_count;
>          eth_rx_descriptor_status_t rx_descriptor_status;
>        eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
>               uintptr_t reserved1[2];
> }

Yes, I think such layout will be better.
The only problem here - we have to wait for 23.11 for that. 

> 
> 
>          /** Rx queues data. */
>          struct rte_ethdev_qdata rxq;
> - uintptr_t reserved1[3];
> + uintptr_t reserved1[2];
>          /**@}*/
> 
>          /**@{*/
> @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
>          eth_tx_prep_t tx_pkt_prepare;
>          /** Check the status of a Tx descriptor. */
>          eth_tx_descriptor_status_t tx_descriptor_status;
> + /** Copy used mbufs from Tx mbuf ring into Rx. */
> + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
>          /** Tx queues data. */
>          struct rte_ethdev_qdata txq;
> - uintptr_t reserved2[3];
> + uintptr_t reserved2[2];
>          /**@}*/
> 
>  } __rte_cache_aligned;
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index 357d1a88c0..45c417f6bd 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -299,6 +299,10 @@ EXPERIMENTAL {
>          rte_flow_action_handle_query_update;
>          rte_flow_async_action_handle_query_update;
>          rte_flow_async_create_by_index;
> +
> + # added in 23.07
> + rte_eth_recycle_mbufs;
> + rte_eth_recycle_rx_queue_info_get;
>  };
> 
>  INTERNAL {
> --
> 2.25.1
>
  
Feifei Wang June 6, 2023, 7:31 a.m. UTC | #5
[...]
> > Probably I am missing something, but why it is not possible to do something
> like that:
> >
> > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> > tx_queue_id=M, ...); ....
> > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> > tx_queue_id=K, ...);
> >
> > I.E. feed rx queue from 2 tx queues?
> >
> > Two problems for this:
> > 1. If we have 2 tx queues for rx, the thread should make the extra
> > judgement to decide which one to choose in the driver layer.
> 
> Not sure, why on the driver layer?
> The example I gave above - decision is made on application layer.
> Lets say first call didn't free enough mbufs, so app decided to use second txq
> for rearm.
[Feifei] I think currently mbuf recycle mode can support this usage. For examples:
n =  rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
if (n < planned_number)
rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);

Thus, if users want, they can do like this. 

> 
> > On the other hand, current mechanism can support users to switch 1 txq
> > to another timely in the application layer. If user want to choose
> > another txq, he just need to change the txq_queue_id parameter in the API.
> > 2. If you want one rxq to support two txq at the same time, this needs
> > to add spinlock on guard variable to avoid multi-thread conflict.
> > Spinlock will decrease the data-path performance greatly.  Thus, we do
> > not consider
> > 1 rxq mapping multiple txqs here.
> 
> I am talking about situation when one thread controls 2 tx queues.
> 
> > + *
> > + * @param rx_port_id
> > + * Port identifying the receive side.
> > + * @param rx_queue_id
> > + * The index of the receive queue identifying the receive side.
> > + * The value must be in the range [0, nb_rx_queue - 1] previously
> > +supplied
> > + * to rte_eth_dev_configure().
> > + * @param tx_port_id
> > + * Port identifying the transmit side.
> > + * @param tx_queue_id
> > + * The index of the transmit queue identifying the transmit side.
> > + * The value must be in the range [0, nb_tx_queue - 1] previously
> > +supplied
> > + * to rte_eth_dev_configure().
> > + * @param recycle_rxq_info
> > + * A pointer to a structure of type *rte_eth_recycle_rxq_info* which
> > +contains
> > + * the information of the Rx queue mbuf ring.
> > + * @return
> > + * The number of recycling mbufs.
> > + */
> > +__rte_experimental
> > +static inline uint16_t
> > +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
> > +uint16_t tx_port_id, uint16_t tx_queue_id,  struct
> > +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct rte_eth_fp_ops
> > +*p;  void *qd;  uint16_t nb_mbufs;
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_TX
> > + if (tx_port_id >= RTE_MAX_ETHPORTS ||  tx_queue_id >=
> > +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR,  "Invalid
> > +tx_port_id=%u or tx_queue_id=%u\n",  tx_port_id, tx_queue_id);
> > +return 0;  } #endif
> > +
> > + /* fetch pointer to queue data */
> > + p = &rte_eth_fp_ops[tx_port_id];
> > + qd = p->txq.data[tx_queue_id];
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_TX
> > + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
> > +
> > + if (qd == NULL) {
> > + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
> > +tx_queue_id, tx_port_id);  return 0;  } #endif  if
> > +(p->recycle_tx_mbufs_reuse == NULL)  return 0;
> > +
> > + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
> > + * into Rx mbuf ring.
> > + */
> > + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
> > +
> > + /* If no recycling mbufs, return 0. */ if (nb_mbufs == 0) return 0;
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > + if (rx_port_id >= RTE_MAX_ETHPORTS ||  rx_queue_id >=
> > +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR, "Invalid
> > +rx_port_id=%u or rx_queue_id=%u\n",  rx_port_id, rx_queue_id);
> > +return 0;  } #endif
> > +
> > + /* fetch pointer to queue data */
> > + p = &rte_eth_fp_ops[rx_port_id];
> > + qd = p->rxq.data[rx_queue_id];
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
> > +
> > + if (qd == NULL) {
> > + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> > +rx_queue_id, rx_port_id);  return 0;  } #endif
> > +
> > + if (p->recycle_rx_descriptors_refill == NULL) return 0;
> > +
> > + /* Replenish the Rx descriptors with the recycling
> > + * into Rx mbuf ring.
> > + */
> > + p->recycle_rx_descriptors_refill(qd, nb_mbufs);
> > +
> > + return nb_mbufs;
> > +}
> > +
> >  /**
> >   * @warning
> >   * @b EXPERIMENTAL: this API may change without prior notice diff
> > --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> > index dcf8adab92..a2e6ea6b6c 100644
> > --- a/lib/ethdev/rte_ethdev_core.h
> > +++ b/lib/ethdev/rte_ethdev_core.h
> > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > *rxq, uint16_t offset);
> >  /** @internal Check the status of a Tx descriptor */  typedef int
> > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> >
> > +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
> > +typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,  struct
> > +rte_eth_recycle_rxq_info *recycle_rxq_info);
> > +
> > +/** @internal Refill Rx descriptors with the recycling mbufs */
> > +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq,
> > +uint16_t nb);
> > +
> >  /**
> >   * @internal
> >   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
> > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> >          eth_rx_queue_count_t rx_queue_count;
> >          /** Check the status of a Rx descriptor. */
> >          eth_rx_descriptor_status_t rx_descriptor_status;
> > + /** Refill Rx descriptors with the recycling mbufs. */
> > + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> > I am afraid we can't put new fields here without ABI breakage.
> >
> > Agree
> >
> > It has to be below rxq.
> > Now thinking about current layout probably not the best one, and when
> > introducing this struct, I should probably put rxq either on the top
> > of the struct, or on the next cache line.
> > But such change is not possible right now anyway.
> > Same story for txq.
> >
> > Thus we should rearrange the structure like below:
> > struct rte_eth_fp_ops {
> >     struct rte_ethdev_qdata rxq;
> >          eth_rx_burst_t rx_pkt_burst;
> >          eth_rx_queue_count_t rx_queue_count;
> >          eth_rx_descriptor_status_t rx_descriptor_status;
> >        eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> >               uintptr_t reserved1[2];
> > }
> 
> Yes, I think such layout will be better.
> The only problem here - we have to wait for 23.11 for that.
> 
Ok, if not this change, maybe we still need to wait. Because mbufs_recycle have other
ABI breakage. Such as the change for 'struct rte_eth_dev'.
> >
> >
> >          /** Rx queues data. */
> >          struct rte_ethdev_qdata rxq;
> > - uintptr_t reserved1[3];
> > + uintptr_t reserved1[2];
> >          /**@}*/
> >
> >          /**@{*/
> > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> >          eth_tx_prep_t tx_pkt_prepare;
> >          /** Check the status of a Tx descriptor. */
> >          eth_tx_descriptor_status_t tx_descriptor_status;
> > + /** Copy used mbufs from Tx mbuf ring into Rx. */
> > + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> >          /** Tx queues data. */
> >          struct rte_ethdev_qdata txq;
> > - uintptr_t reserved2[3];
> > + uintptr_t reserved2[2];
> >          /**@}*/
> >
> >  } __rte_cache_aligned;
> > diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
> > 357d1a88c0..45c417f6bd 100644
> > --- a/lib/ethdev/version.map
> > +++ b/lib/ethdev/version.map
> > @@ -299,6 +299,10 @@ EXPERIMENTAL {
> >          rte_flow_action_handle_query_update;
> >          rte_flow_async_action_handle_query_update;
> >          rte_flow_async_create_by_index;
> > +
> > + # added in 23.07
> > + rte_eth_recycle_mbufs;
> > + rte_eth_recycle_rx_queue_info_get;
> >  };
> >
> >  INTERNAL {
> > --
> > 2.25.1
> >
  
Konstantin Ananyev June 6, 2023, 8:34 a.m. UTC | #6
> 
> [...]
> > > Probably I am missing something, but why it is not possible to do something
> > like that:
> > >
> > > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> > > tx_queue_id=M, ...); ....
> > > rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> > > tx_queue_id=K, ...);
> > >
> > > I.E. feed rx queue from 2 tx queues?
> > >
> > > Two problems for this:
> > > 1. If we have 2 tx queues for rx, the thread should make the extra
> > > judgement to decide which one to choose in the driver layer.
> >
> > Not sure, why on the driver layer?
> > The example I gave above - decision is made on application layer.
> > Lets say first call didn't free enough mbufs, so app decided to use second txq
> > for rearm.
> [Feifei] I think currently mbuf recycle mode can support this usage. For examples:
> n =  rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
> if (n < planned_number)
> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);
> 
> Thus, if users want, they can do like this.

Yes, that was my thought, that's why I was surprise that in the comments we have:
" Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
* between the receive queue and transmit queue. Do not pair one receive queue with
 * multiple transmit queues or pair one transmit queue with multiple receive queues,
 * in order to avoid memory error rewriting."

> 
> >
> > > On the other hand, current mechanism can support users to switch 1 txq
> > > to another timely in the application layer. If user want to choose
> > > another txq, he just need to change the txq_queue_id parameter in the API.
> > > 2. If you want one rxq to support two txq at the same time, this needs
> > > to add spinlock on guard variable to avoid multi-thread conflict.
> > > Spinlock will decrease the data-path performance greatly.  Thus, we do
> > > not consider
> > > 1 rxq mapping multiple txqs here.
> >
> > I am talking about situation when one thread controls 2 tx queues.
> >
> > > + *
> > > + * @param rx_port_id
> > > + * Port identifying the receive side.
> > > + * @param rx_queue_id
> > > + * The index of the receive queue identifying the receive side.
> > > + * The value must be in the range [0, nb_rx_queue - 1] previously
> > > +supplied
> > > + * to rte_eth_dev_configure().
> > > + * @param tx_port_id
> > > + * Port identifying the transmit side.
> > > + * @param tx_queue_id
> > > + * The index of the transmit queue identifying the transmit side.
> > > + * The value must be in the range [0, nb_tx_queue - 1] previously
> > > +supplied
> > > + * to rte_eth_dev_configure().
> > > + * @param recycle_rxq_info
> > > + * A pointer to a structure of type *rte_eth_recycle_rxq_info* which
> > > +contains
> > > + * the information of the Rx queue mbuf ring.
> > > + * @return
> > > + * The number of recycling mbufs.
> > > + */
> > > +__rte_experimental
> > > +static inline uint16_t
> > > +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
> > > +uint16_t tx_port_id, uint16_t tx_queue_id,  struct
> > > +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct rte_eth_fp_ops
> > > +*p;  void *qd;  uint16_t nb_mbufs;
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > + if (tx_port_id >= RTE_MAX_ETHPORTS ||  tx_queue_id >=
> > > +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR,  "Invalid
> > > +tx_port_id=%u or tx_queue_id=%u\n",  tx_port_id, tx_queue_id);
> > > +return 0;  } #endif
> > > +
> > > + /* fetch pointer to queue data */
> > > + p = &rte_eth_fp_ops[tx_port_id];
> > > + qd = p->txq.data[tx_queue_id];
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
> > > +
> > > + if (qd == NULL) {
> > > + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
> > > +tx_queue_id, tx_port_id);  return 0;  } #endif  if
> > > +(p->recycle_tx_mbufs_reuse == NULL)  return 0;
> > > +
> > > + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
> > > + * into Rx mbuf ring.
> > > + */
> > > + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
> > > +
> > > + /* If no recycling mbufs, return 0. */ if (nb_mbufs == 0) return 0;
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > + if (rx_port_id >= RTE_MAX_ETHPORTS ||  rx_queue_id >=
> > > +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR, "Invalid
> > > +rx_port_id=%u or rx_queue_id=%u\n",  rx_port_id, rx_queue_id);
> > > +return 0;  } #endif
> > > +
> > > + /* fetch pointer to queue data */
> > > + p = &rte_eth_fp_ops[rx_port_id];
> > > + qd = p->rxq.data[rx_queue_id];
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
> > > +
> > > + if (qd == NULL) {
> > > + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> > > +rx_queue_id, rx_port_id);  return 0;  } #endif
> > > +
> > > + if (p->recycle_rx_descriptors_refill == NULL) return 0;
> > > +
> > > + /* Replenish the Rx descriptors with the recycling
> > > + * into Rx mbuf ring.
> > > + */
> > > + p->recycle_rx_descriptors_refill(qd, nb_mbufs);
> > > +
> > > + return nb_mbufs;
> > > +}
> > > +
> > >  /**
> > >   * @warning
> > >   * @b EXPERIMENTAL: this API may change without prior notice diff
> > > --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> > > index dcf8adab92..a2e6ea6b6c 100644
> > > --- a/lib/ethdev/rte_ethdev_core.h
> > > +++ b/lib/ethdev/rte_ethdev_core.h
> > > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > > *rxq, uint16_t offset);
> > >  /** @internal Check the status of a Tx descriptor */  typedef int
> > > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> > >
> > > +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
> > > +typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,  struct
> > > +rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > +
> > > +/** @internal Refill Rx descriptors with the recycling mbufs */
> > > +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq,
> > > +uint16_t nb);
> > > +
> > >  /**
> > >   * @internal
> > >   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
> > > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> > >          eth_rx_queue_count_t rx_queue_count;
> > >          /** Check the status of a Rx descriptor. */
> > >          eth_rx_descriptor_status_t rx_descriptor_status;
> > > + /** Refill Rx descriptors with the recycling mbufs. */
> > > + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> > > I am afraid we can't put new fields here without ABI breakage.
> > >
> > > Agree
> > >
> > > It has to be below rxq.
> > > Now thinking about current layout probably not the best one, and when
> > > introducing this struct, I should probably put rxq either on the top
> > > of the struct, or on the next cache line.
> > > But such change is not possible right now anyway.
> > > Same story for txq.
> > >
> > > Thus we should rearrange the structure like below:
> > > struct rte_eth_fp_ops {
> > >     struct rte_ethdev_qdata rxq;
> > >          eth_rx_burst_t rx_pkt_burst;
> > >          eth_rx_queue_count_t rx_queue_count;
> > >          eth_rx_descriptor_status_t rx_descriptor_status;
> > >        eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> > >               uintptr_t reserved1[2];
> > > }
> >
> > Yes, I think such layout will be better.
> > The only problem here - we have to wait for 23.11 for that.
> >
> Ok, if not this change, maybe we still need to wait. Because mbufs_recycle have other
> ABI breakage. Such as the change for 'struct rte_eth_dev'.

Ok by me.

> > >
> > >
> > >          /** Rx queues data. */
> > >          struct rte_ethdev_qdata rxq;
> > > - uintptr_t reserved1[3];
> > > + uintptr_t reserved1[2];
> > >          /**@}*/
> > >
> > >          /**@{*/
> > > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> > >          eth_tx_prep_t tx_pkt_prepare;
> > >          /** Check the status of a Tx descriptor. */
> > >          eth_tx_descriptor_status_t tx_descriptor_status;
> > > + /** Copy used mbufs from Tx mbuf ring into Rx. */
> > > + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> > >          /** Tx queues data. */
> > >          struct rte_ethdev_qdata txq;
> > > - uintptr_t reserved2[3];
> > > + uintptr_t reserved2[2];
> > >          /**@}*/
> > >
> > >  } __rte_cache_aligned;
> > > diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
> > > 357d1a88c0..45c417f6bd 100644
> > > --- a/lib/ethdev/version.map
> > > +++ b/lib/ethdev/version.map
> > > @@ -299,6 +299,10 @@ EXPERIMENTAL {
> > >          rte_flow_action_handle_query_update;
> > >          rte_flow_async_action_handle_query_update;
> > >          rte_flow_async_create_by_index;
> > > +
> > > + # added in 23.07
> > > + rte_eth_recycle_mbufs;
> > > + rte_eth_recycle_rx_queue_info_get;
> > >  };
> > >
> > >  INTERNAL {
> > > --
> > > 2.25.1
> > >
  
Ferruh Yigit June 7, 2023, midnight UTC | #7
On 6/6/2023 9:34 AM, Konstantin Ananyev wrote:
> 
> 
>>
>> [...]
>>>> Probably I am missing something, but why it is not possible to do something
>>> like that:
>>>>
>>>> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
>>>> tx_queue_id=M, ...); ....
>>>> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
>>>> tx_queue_id=K, ...);
>>>>
>>>> I.E. feed rx queue from 2 tx queues?
>>>>
>>>> Two problems for this:
>>>> 1. If we have 2 tx queues for rx, the thread should make the extra
>>>> judgement to decide which one to choose in the driver layer.
>>>
>>> Not sure, why on the driver layer?
>>> The example I gave above - decision is made on application layer.
>>> Lets say first call didn't free enough mbufs, so app decided to use second txq
>>> for rearm.
>> [Feifei] I think currently mbuf recycle mode can support this usage. For examples:
>> n =  rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=M, ...);
>> if (n < planned_number)
>> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N, tx_queue_id=K, ...);
>>
>> Thus, if users want, they can do like this.
> 
> Yes, that was my thought, that's why I was surprise that in the comments we have:
> " Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
> * between the receive queue and transmit queue. Do not pair one receive queue with
>  * multiple transmit queues or pair one transmit queue with multiple receive queues,
>  * in order to avoid memory error rewriting."
> 

I guess that is from previous versions of the set, it can be good to
address limitations/restrictions again with latest version.


>>
>>>
>>>> On the other hand, current mechanism can support users to switch 1 txq
>>>> to another timely in the application layer. If user want to choose
>>>> another txq, he just need to change the txq_queue_id parameter in the API.
>>>> 2. If you want one rxq to support two txq at the same time, this needs
>>>> to add spinlock on guard variable to avoid multi-thread conflict.
>>>> Spinlock will decrease the data-path performance greatly.  Thus, we do
>>>> not consider
>>>> 1 rxq mapping multiple txqs here.
>>>
>>> I am talking about situation when one thread controls 2 tx queues.
>>>
>>>> + *
>>>> + * @param rx_port_id
>>>> + * Port identifying the receive side.
>>>> + * @param rx_queue_id
>>>> + * The index of the receive queue identifying the receive side.
>>>> + * The value must be in the range [0, nb_rx_queue - 1] previously
>>>> +supplied
>>>> + * to rte_eth_dev_configure().
>>>> + * @param tx_port_id
>>>> + * Port identifying the transmit side.
>>>> + * @param tx_queue_id
>>>> + * The index of the transmit queue identifying the transmit side.
>>>> + * The value must be in the range [0, nb_tx_queue - 1] previously
>>>> +supplied
>>>> + * to rte_eth_dev_configure().
>>>> + * @param recycle_rxq_info
>>>> + * A pointer to a structure of type *rte_eth_recycle_rxq_info* which
>>>> +contains
>>>> + * the information of the Rx queue mbuf ring.
>>>> + * @return
>>>> + * The number of recycling mbufs.
>>>> + */
>>>> +__rte_experimental
>>>> +static inline uint16_t
>>>> +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
>>>> +uint16_t tx_port_id, uint16_t tx_queue_id,  struct
>>>> +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct rte_eth_fp_ops
>>>> +*p;  void *qd;  uint16_t nb_mbufs;
>>>> +
>>>> +#ifdef RTE_ETHDEV_DEBUG_TX
>>>> + if (tx_port_id >= RTE_MAX_ETHPORTS ||  tx_queue_id >=
>>>> +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR,  "Invalid
>>>> +tx_port_id=%u or tx_queue_id=%u\n",  tx_port_id, tx_queue_id);
>>>> +return 0;  } #endif
>>>> +
>>>> + /* fetch pointer to queue data */
>>>> + p = &rte_eth_fp_ops[tx_port_id];
>>>> + qd = p->txq.data[tx_queue_id];
>>>> +
>>>> +#ifdef RTE_ETHDEV_DEBUG_TX
>>>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
>>>> +
>>>> + if (qd == NULL) {
>>>> + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
>>>> +tx_queue_id, tx_port_id);  return 0;  } #endif  if
>>>> +(p->recycle_tx_mbufs_reuse == NULL)  return 0;
>>>> +
>>>> + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
>>>> + * into Rx mbuf ring.
>>>> + */
>>>> + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
>>>> +
>>>> + /* If no recycling mbufs, return 0. */ if (nb_mbufs == 0) return 0;
>>>> +
>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>> + if (rx_port_id >= RTE_MAX_ETHPORTS ||  rx_queue_id >=
>>>> +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR, "Invalid
>>>> +rx_port_id=%u or rx_queue_id=%u\n",  rx_port_id, rx_queue_id);
>>>> +return 0;  } #endif
>>>> +
>>>> + /* fetch pointer to queue data */
>>>> + p = &rte_eth_fp_ops[rx_port_id];
>>>> + qd = p->rxq.data[rx_queue_id];
>>>> +
>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
>>>> +
>>>> + if (qd == NULL) {
>>>> + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
>>>> +rx_queue_id, rx_port_id);  return 0;  } #endif
>>>> +
>>>> + if (p->recycle_rx_descriptors_refill == NULL) return 0;
>>>> +
>>>> + /* Replenish the Rx descriptors with the recycling
>>>> + * into Rx mbuf ring.
>>>> + */
>>>> + p->recycle_rx_descriptors_refill(qd, nb_mbufs);
>>>> +
>>>> + return nb_mbufs;
>>>> +}
>>>> +
>>>>  /**
>>>>   * @warning
>>>>   * @b EXPERIMENTAL: this API may change without prior notice diff
>>>> --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
>>>> index dcf8adab92..a2e6ea6b6c 100644
>>>> --- a/lib/ethdev/rte_ethdev_core.h
>>>> +++ b/lib/ethdev/rte_ethdev_core.h
>>>> @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
>>>> *rxq, uint16_t offset);
>>>>  /** @internal Check the status of a Tx descriptor */  typedef int
>>>> (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
>>>>
>>>> +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
>>>> +typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,  struct
>>>> +rte_eth_recycle_rxq_info *recycle_rxq_info);
>>>> +
>>>> +/** @internal Refill Rx descriptors with the recycling mbufs */
>>>> +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq,
>>>> +uint16_t nb);
>>>> +
>>>>  /**
>>>>   * @internal
>>>>   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
>>>> -90,9 +97,11 @@ struct rte_eth_fp_ops {
>>>>          eth_rx_queue_count_t rx_queue_count;
>>>>          /** Check the status of a Rx descriptor. */
>>>>          eth_rx_descriptor_status_t rx_descriptor_status;
>>>> + /** Refill Rx descriptors with the recycling mbufs. */
>>>> + eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
>>>> I am afraid we can't put new fields here without ABI breakage.
>>>>
>>>> Agree
>>>>
>>>> It has to be below rxq.
>>>> Now thinking about current layout probably not the best one, and when
>>>> introducing this struct, I should probably put rxq either on the top
>>>> of the struct, or on the next cache line.
>>>> But such change is not possible right now anyway.
>>>> Same story for txq.
>>>>
>>>> Thus we should rearrange the structure like below:
>>>> struct rte_eth_fp_ops {
>>>>     struct rte_ethdev_qdata rxq;
>>>>          eth_rx_burst_t rx_pkt_burst;
>>>>          eth_rx_queue_count_t rx_queue_count;
>>>>          eth_rx_descriptor_status_t rx_descriptor_status;
>>>>        eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
>>>>               uintptr_t reserved1[2];
>>>> }
>>>
>>> Yes, I think such layout will be better.
>>> The only problem here - we have to wait for 23.11 for that.
>>>
>> Ok, if not this change, maybe we still need to wait. Because mbufs_recycle have other
>> ABI breakage. Such as the change for 'struct rte_eth_dev'.
> 
> Ok by me.
> 
>>>>
>>>>
>>>>          /** Rx queues data. */
>>>>          struct rte_ethdev_qdata rxq;
>>>> - uintptr_t reserved1[3];
>>>> + uintptr_t reserved1[2];
>>>>          /**@}*/
>>>>
>>>>          /**@{*/
>>>> @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
>>>>          eth_tx_prep_t tx_pkt_prepare;
>>>>          /** Check the status of a Tx descriptor. */
>>>>          eth_tx_descriptor_status_t tx_descriptor_status;
>>>> + /** Copy used mbufs from Tx mbuf ring into Rx. */
>>>> + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
>>>>          /** Tx queues data. */
>>>>          struct rte_ethdev_qdata txq;
>>>> - uintptr_t reserved2[3];
>>>> + uintptr_t reserved2[2];
>>>>          /**@}*/
>>>>
>>>>  } __rte_cache_aligned;
>>>> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
>>>> 357d1a88c0..45c417f6bd 100644
>>>> --- a/lib/ethdev/version.map
>>>> +++ b/lib/ethdev/version.map
>>>> @@ -299,6 +299,10 @@ EXPERIMENTAL {
>>>>          rte_flow_action_handle_query_update;
>>>>          rte_flow_async_action_handle_query_update;
>>>>          rte_flow_async_create_by_index;
>>>> +
>>>> + # added in 23.07
>>>> + rte_eth_recycle_mbufs;
>>>> + rte_eth_recycle_rx_queue_info_get;
>>>>  };
>>>>
>>>>  INTERNAL {
>>>> --
>>>> 2.25.1
>>>>
  
Feifei Wang June 12, 2023, 3:25 a.m. UTC | #8
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Wednesday, June 7, 2023 8:01 AM
> To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Feifei Wang
> <Feifei.Wang2@arm.com>; Константин Ананьев
> <konstantin.v.ananyev@yandex.ru>; thomas@monjalon.net; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Subject: Re: [PATCH v6 1/4] ethdev: add API for mbufs recycle mode
> 
> On 6/6/2023 9:34 AM, Konstantin Ananyev wrote:
> >
> >
> >>
> >> [...]
> >>>> Probably I am missing something, but why it is not possible to do
> >>>> something
> >>> like that:
> >>>>
> >>>> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> >>>> tx_queue_id=M, ...); ....
> >>>> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> >>>> tx_queue_id=K, ...);
> >>>>
> >>>> I.E. feed rx queue from 2 tx queues?
> >>>>
> >>>> Two problems for this:
> >>>> 1. If we have 2 tx queues for rx, the thread should make the extra
> >>>> judgement to decide which one to choose in the driver layer.
> >>>
> >>> Not sure, why on the driver layer?
> >>> The example I gave above - decision is made on application layer.
> >>> Lets say first call didn't free enough mbufs, so app decided to use
> >>> second txq for rearm.
> >> [Feifei] I think currently mbuf recycle mode can support this usage. For
> examples:
> >> n =  rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> >> tx_queue_id=M, ...); if (n < planned_number)
> >> rte_eth_recycle_mbufs(rx_port_id=X, rx_queue_id=Y, tx_port_id=N,
> >> tx_queue_id=K, ...);
> >>
> >> Thus, if users want, they can do like this.
> >
> > Yes, that was my thought, that's why I was surprise that in the comments we
> have:
> > " Currently, the rte_eth_recycle_mbufs() function can only support
> > one-time pairing
> > * between the receive queue and transmit queue. Do not pair one
> > receive queue with
> >  * multiple transmit queues or pair one transmit queue with multiple
> > receive queues,
> >  * in order to avoid memory error rewriting."
> >
> 
> I guess that is from previous versions of the set, it can be good to address
> limitations/restrictions again with latest version.

[Feifei]  Sorry, I think this is due to my ambiguous expression in function description.
I wanted to show 'mbufs_recycle' cannot support multiple threads.

I will change the description and add extra expression to tell users that they
can change config from one txq to another in single thread.
Thanks for the comments. 

> 
> 
> >>
> >>>
> >>>> On the other hand, current mechanism can support users to switch 1
> >>>> txq to another timely in the application layer. If user want to
> >>>> choose another txq, he just need to change the txq_queue_id parameter
> in the API.
> >>>> 2. If you want one rxq to support two txq at the same time, this
> >>>> needs to add spinlock on guard variable to avoid multi-thread conflict.
> >>>> Spinlock will decrease the data-path performance greatly.  Thus, we
> >>>> do not consider
> >>>> 1 rxq mapping multiple txqs here.
> >>>
> >>> I am talking about situation when one thread controls 2 tx queues.
> >>>
> >>>> + *
> >>>> + * @param rx_port_id
> >>>> + * Port identifying the receive side.
> >>>> + * @param rx_queue_id
> >>>> + * The index of the receive queue identifying the receive side.
> >>>> + * The value must be in the range [0, nb_rx_queue - 1] previously
> >>>> +supplied
> >>>> + * to rte_eth_dev_configure().
> >>>> + * @param tx_port_id
> >>>> + * Port identifying the transmit side.
> >>>> + * @param tx_queue_id
> >>>> + * The index of the transmit queue identifying the transmit side.
> >>>> + * The value must be in the range [0, nb_tx_queue - 1] previously
> >>>> +supplied
> >>>> + * to rte_eth_dev_configure().
> >>>> + * @param recycle_rxq_info
> >>>> + * A pointer to a structure of type *rte_eth_recycle_rxq_info*
> >>>> +which contains
> >>>> + * the information of the Rx queue mbuf ring.
> >>>> + * @return
> >>>> + * The number of recycling mbufs.
> >>>> + */
> >>>> +__rte_experimental
> >>>> +static inline uint16_t
> >>>> +rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
> >>>> +uint16_t tx_port_id, uint16_t tx_queue_id,  struct
> >>>> +rte_eth_recycle_rxq_info *recycle_rxq_info) {  struct
> >>>> +rte_eth_fp_ops *p;  void *qd;  uint16_t nb_mbufs;
> >>>> +
> >>>> +#ifdef RTE_ETHDEV_DEBUG_TX
> >>>> + if (tx_port_id >= RTE_MAX_ETHPORTS ||  tx_queue_id >=
> >>>> +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR,  "Invalid
> >>>> +tx_port_id=%u or tx_queue_id=%u\n",  tx_port_id, tx_queue_id);
> >>>> +return 0;  } #endif
> >>>> +
> >>>> + /* fetch pointer to queue data */ p =
> >>>> + &rte_eth_fp_ops[tx_port_id]; qd = p->txq.data[tx_queue_id];
> >>>> +
> >>>> +#ifdef RTE_ETHDEV_DEBUG_TX
> >>>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
> >>>> +
> >>>> + if (qd == NULL) {
> >>>> + RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
> >>>> +tx_queue_id, tx_port_id);  return 0;  } #endif  if
> >>>> +(p->recycle_tx_mbufs_reuse == NULL)  return 0;
> >>>> +
> >>>> + /* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
> >>>> + * into Rx mbuf ring.
> >>>> + */
> >>>> + nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
> >>>> +
> >>>> + /* If no recycling mbufs, return 0. */ if (nb_mbufs == 0) return
> >>>> + 0;
> >>>> +
> >>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>> + if (rx_port_id >= RTE_MAX_ETHPORTS ||  rx_queue_id >=
> >>>> +RTE_MAX_QUEUES_PER_PORT) {  RTE_ETHDEV_LOG(ERR, "Invalid
> >>>> +rx_port_id=%u or rx_queue_id=%u\n",  rx_port_id, rx_queue_id);
> >>>> +return 0;  } #endif
> >>>> +
> >>>> + /* fetch pointer to queue data */ p =
> >>>> + &rte_eth_fp_ops[rx_port_id]; qd = p->rxq.data[rx_queue_id];
> >>>> +
> >>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>> + RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
> >>>> +
> >>>> + if (qd == NULL) {
> >>>> + RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> >>>> +rx_queue_id, rx_port_id);  return 0;  } #endif
> >>>> +
> >>>> + if (p->recycle_rx_descriptors_refill == NULL) return 0;
> >>>> +
> >>>> + /* Replenish the Rx descriptors with the recycling
> >>>> + * into Rx mbuf ring.
> >>>> + */
> >>>> + p->recycle_rx_descriptors_refill(qd, nb_mbufs);
> >>>> +
> >>>> + return nb_mbufs;
> >>>> +}
> >>>> +
> >>>>  /**
> >>>>   * @warning
> >>>>   * @b EXPERIMENTAL: this API may change without prior notice diff
> >>>> --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> >>>> index dcf8adab92..a2e6ea6b6c 100644
> >>>> --- a/lib/ethdev/rte_ethdev_core.h
> >>>> +++ b/lib/ethdev/rte_ethdev_core.h
> >>>> @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> >>>> *rxq, uint16_t offset);
> >>>>  /** @internal Check the status of a Tx descriptor */  typedef int
> >>>> (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> >>>>
> >>>> +/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring
> >>>> +*/ typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,
> >>>> +struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> >>>> +
> >>>> +/** @internal Refill Rx descriptors with the recycling mbufs */
> >>>> +typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq,
> >>>> +uint16_t nb);
> >>>> +
> >>>>  /**
> >>>>   * @internal
> >>>>   * Structure used to hold opaque pointers to internal ethdev Rx/Tx
> >>>> @@
> >>>> -90,9 +97,11 @@ struct rte_eth_fp_ops {
> >>>>          eth_rx_queue_count_t rx_queue_count;
> >>>>          /** Check the status of a Rx descriptor. */
> >>>>          eth_rx_descriptor_status_t rx_descriptor_status;
> >>>> + /** Refill Rx descriptors with the recycling mbufs. */
> >>>> + eth_recycle_rx_descriptors_refill_t
> >>>> + recycle_rx_descriptors_refill;
> >>>> I am afraid we can't put new fields here without ABI breakage.
> >>>>
> >>>> Agree
> >>>>
> >>>> It has to be below rxq.
> >>>> Now thinking about current layout probably not the best one, and
> >>>> when introducing this struct, I should probably put rxq either on
> >>>> the top of the struct, or on the next cache line.
> >>>> But such change is not possible right now anyway.
> >>>> Same story for txq.
> >>>>
> >>>> Thus we should rearrange the structure like below:
> >>>> struct rte_eth_fp_ops {
> >>>>     struct rte_ethdev_qdata rxq;
> >>>>          eth_rx_burst_t rx_pkt_burst;
> >>>>          eth_rx_queue_count_t rx_queue_count;
> >>>>          eth_rx_descriptor_status_t rx_descriptor_status;
> >>>>        eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
> >>>>               uintptr_t reserved1[2]; }
> >>>
> >>> Yes, I think such layout will be better.
> >>> The only problem here - we have to wait for 23.11 for that.
> >>>
> >> Ok, if not this change, maybe we still need to wait. Because
> >> mbufs_recycle have other ABI breakage. Such as the change for 'struct
> rte_eth_dev'.
> >
> > Ok by me.
> >
> >>>>
> >>>>
> >>>>          /** Rx queues data. */
> >>>>          struct rte_ethdev_qdata rxq;
> >>>> - uintptr_t reserved1[3];
> >>>> + uintptr_t reserved1[2];
> >>>>          /**@}*/
> >>>>
> >>>>          /**@{*/
> >>>> @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> >>>>          eth_tx_prep_t tx_pkt_prepare;
> >>>>          /** Check the status of a Tx descriptor. */
> >>>>          eth_tx_descriptor_status_t tx_descriptor_status;
> >>>> + /** Copy used mbufs from Tx mbuf ring into Rx. */
> >>>> + eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
> >>>>          /** Tx queues data. */
> >>>>          struct rte_ethdev_qdata txq;
> >>>> - uintptr_t reserved2[3];
> >>>> + uintptr_t reserved2[2];
> >>>>          /**@}*/
> >>>>
> >>>>  } __rte_cache_aligned;
> >>>> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map index
> >>>> 357d1a88c0..45c417f6bd 100644
> >>>> --- a/lib/ethdev/version.map
> >>>> +++ b/lib/ethdev/version.map
> >>>> @@ -299,6 +299,10 @@ EXPERIMENTAL {
> >>>>          rte_flow_action_handle_query_update;
> >>>>          rte_flow_async_action_handle_query_update;
> >>>>          rte_flow_async_create_by_index;
> >>>> +
> >>>> + # added in 23.07
> >>>> + rte_eth_recycle_mbufs;
> >>>> + rte_eth_recycle_rx_queue_info_get;
> >>>>  };
> >>>>
> >>>>  INTERNAL {
> >>>> --
> >>>> 2.25.1
> >>>>
  

Patch

diff --git a/doc/guides/rel_notes/release_23_07.rst b/doc/guides/rel_notes/release_23_07.rst
index a9b1293689..f279036cb9 100644
--- a/doc/guides/rel_notes/release_23_07.rst
+++ b/doc/guides/rel_notes/release_23_07.rst
@@ -55,6 +55,13 @@  New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Add mbufs recycling support. **
+  Added ``rte_eth_recycle_rx_queue_info_get`` and ``rte_eth_recycle_mbufs``
+  APIs which allow the user to copy used mbufs from the Tx mbuf ring
+  into the Rx mbuf ring. This feature supports the case that the Rx Ethernet
+  device is different from the Tx Ethernet device with respective driver
+  callback functions in ``rte_eth_recycle_mbufs``.
+
 
 Removed Items
 -------------
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 2c9d615fb5..c6723d5277 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -59,6 +59,10 @@  struct rte_eth_dev {
 	eth_rx_descriptor_status_t rx_descriptor_status;
 	/** Check the status of a Tx descriptor */
 	eth_tx_descriptor_status_t tx_descriptor_status;
+	/** Pointer to PMD transmit mbufs reuse function */
+	eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
+	/** Pointer to PMD receive descriptors refill function */
+	eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
 
 	/**
 	 * Device data that is shared between primary and secondary processes
@@ -504,6 +508,10 @@  typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
 typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
 	uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);
 
+typedef void (*eth_recycle_rxq_info_get_t)(struct rte_eth_dev *dev,
+	uint16_t rx_queue_id,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
 typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev,
 	uint16_t queue_id, struct rte_eth_burst_mode *mode);
 
@@ -1247,6 +1255,8 @@  struct eth_dev_ops {
 	eth_rxq_info_get_t         rxq_info_get;
 	/** Retrieve Tx queue information */
 	eth_txq_info_get_t         txq_info_get;
+	/** Retrieve mbufs recycle Rx queue information */
+	eth_recycle_rxq_info_get_t recycle_rxq_info_get;
 	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst mode */
 	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst mode */
 	eth_fw_version_get_t       fw_version_get; /**< Get firmware version */
diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 14ec8c6ccf..f8ab64f195 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -277,6 +277,8 @@  eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
 	fpo->rx_queue_count = dev->rx_queue_count;
 	fpo->rx_descriptor_status = dev->rx_descriptor_status;
 	fpo->tx_descriptor_status = dev->tx_descriptor_status;
+	fpo->recycle_tx_mbufs_reuse = dev->recycle_tx_mbufs_reuse;
+	fpo->recycle_rx_descriptors_refill = dev->recycle_rx_descriptors_refill;
 
 	fpo->rxq.data = dev->data->rx_queues;
 	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 4d03255683..7c27dcfea4 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5784,6 +5784,37 @@  rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	return 0;
 }
 
+int
+rte_eth_recycle_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->rx_queues == NULL ||
+			dev->data->rx_queues[queue_id] == NULL) {
+		RTE_ETHDEV_LOG(ERR,
+			   "Rx queue %"PRIu16" of device with port_id=%"
+			   PRIu16" has not been setup\n",
+			   queue_id, port_id);
+		return -EINVAL;
+	}
+
+	if (*dev->dev_ops->recycle_rxq_info_get == NULL)
+		return -ENOTSUP;
+
+	dev->dev_ops->recycle_rxq_info_get(dev, queue_id, recycle_rxq_info);
+
+	return 0;
+}
+
 int
 rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id,
 			  struct rte_eth_burst_mode *mode)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 99fe9e238b..7434aa2483 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1820,6 +1820,30 @@  struct rte_eth_txq_info {
 	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
 } __rte_cache_min_aligned;
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * Ethernet device Rx queue information structure for recycling mbufs.
+ * Used to retrieve Rx queue information when Tx queue reusing mbufs and moving
+ * them into Rx mbuf ring.
+ */
+struct rte_eth_recycle_rxq_info {
+	struct rte_mbuf **mbuf_ring; /**< mbuf ring of Rx queue. */
+	struct rte_mempool *mp;     /**< mempool of Rx queue. */
+	uint16_t *refill_head;      /**< head of Rx queue refilling mbufs. */
+	uint16_t *receive_tail;     /**< tail of Rx queue receiving pkts. */
+	uint16_t mbuf_ring_size;     /**< configured number of mbuf ring size. */
+	/**
+	 * Requirement on mbuf refilling batch size of Rx mbuf ring.
+	 * For some PMD drivers, the number of Rx mbuf ring refilling mbufs
+	 * should be aligned with mbuf ring size, in order to simplify
+	 * ring wrapping around.
+	 * Value 0 means that PMD drivers have no requirement for this.
+	 */
+	uint16_t refill_requirement;
+} __rte_cache_min_aligned;
+
 /* Generic Burst mode flag definition, values can be ORed. */
 
 /**
@@ -4809,6 +4833,31 @@  int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Retrieve information about given ports's Rx queue for recycling mbufs.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The Rx queue on the Ethernet devicefor which information
+ *   will be retrieved.
+ * @param recycle_rxq_info
+ *   A pointer to a structure of type *rte_eth_recycle_rxq_info* to be filled.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENODEV:  If *port_id* is invalid.
+ *   - -ENOTSUP: routine is not supported by the device PMD.
+ *   - -EINVAL:  The queue_id is out of range.
+ */
+__rte_experimental
+int rte_eth_recycle_rx_queue_info_get(uint16_t port_id,
+		uint16_t queue_id,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
 /**
  * Retrieve information about the Rx packet burst mode.
  *
@@ -6483,6 +6532,139 @@  rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
 	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
 }
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Recycle used mbufs from a transmit queue of an Ethernet device, and move
+ * these mbufs into a mbuf ring for a receive queue of an Ethernet device.
+ * This can bypass mempool path to save CPU cycles.
+ *
+ * The rte_eth_recycle_mbufs() function loops, with rte_eth_rx_burst() and
+ * rte_eth_tx_burst() functions, freeing Tx used mbufs and replenishing Rx
+ * descriptors. The number of recycling mbufs depends on the request of Rx mbuf
+ * ring, with the constraint of enough used mbufs from Tx mbuf ring.
+ *
+ * For each recycling mbufs, the rte_eth_recycle_mbufs() function performs the
+ * following operations:
+ *
+ * - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf ring.
+ *
+ * - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed
+ *   from the Tx mbuf ring.
+ *
+ * This function spilts Rx and Tx path with different callback functions. The
+ * callback function recycle_tx_mbufs_reuse is for Tx driver. The callback
+ * function recycle_rx_descriptors_refill is for Rx driver. rte_eth_recycle_mbufs()
+ * can support the case that Rx Ethernet device is different from Tx Ethernet device.
+ *
+ * It is the responsibility of users to select the Rx/Tx queue pair to recycle
+ * mbufs. Before call this function, users must call rte_eth_recycle_rxq_info_get
+ * function to retrieve selected Rx queue information.
+ * @see rte_eth_recycle_rxq_info_get, struct rte_eth_recycle_rxq_info
+ *
+ * Currently, the rte_eth_recycle_mbufs() function can only support one-time pairing
+ * between the receive queue and transmit queue. Do not pair one receive queue with
+ * multiple transmit queues or pair one transmit queue with multiple receive queues,
+ * in order to avoid memory error rewriting.
+ *
+ * @param rx_port_id
+ *   Port identifying the receive side.
+ * @param rx_queue_id
+ *   The index of the receive queue identifying the receive side.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_port_id
+ *   Port identifying the transmit side.
+ * @param tx_queue_id
+ *   The index of the transmit queue identifying the transmit side.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param recycle_rxq_info
+ *   A pointer to a structure of type *rte_eth_recycle_rxq_info* which contains
+ *   the information of the Rx queue mbuf ring.
+ * @return
+ *   The number of recycling mbufs.
+ */
+__rte_experimental
+static inline uint16_t
+rte_eth_recycle_mbufs(uint16_t rx_port_id, uint16_t rx_queue_id,
+		uint16_t tx_port_id, uint16_t tx_queue_id,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct rte_eth_fp_ops *p;
+	void *qd;
+	uint16_t nb_mbufs;
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+	if (tx_port_id >= RTE_MAX_ETHPORTS ||
+			tx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+		RTE_ETHDEV_LOG(ERR,
+				"Invalid tx_port_id=%u or tx_queue_id=%u\n",
+				tx_port_id, tx_queue_id);
+		return 0;
+	}
+#endif
+
+	/* fetch pointer to queue data */
+	p = &rte_eth_fp_ops[tx_port_id];
+	qd = p->txq.data[tx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(tx_port_id, 0);
+
+	if (qd == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
+				tx_queue_id, tx_port_id);
+		return 0;
+	}
+#endif
+	if (p->recycle_tx_mbufs_reuse == NULL)
+		return 0;
+
+	/* Copy used *rte_mbuf* buffer pointers from Tx mbuf ring
+	 * into Rx mbuf ring.
+	 */
+	nb_mbufs = p->recycle_tx_mbufs_reuse(qd, recycle_rxq_info);
+
+	/* If no recycling mbufs, return 0. */
+	if (nb_mbufs == 0)
+		return 0;
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	if (rx_port_id >= RTE_MAX_ETHPORTS ||
+			rx_queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+		RTE_ETHDEV_LOG(ERR, "Invalid rx_port_id=%u or rx_queue_id=%u\n",
+				rx_port_id, rx_queue_id);
+		return 0;
+	}
+#endif
+
+	/* fetch pointer to queue data */
+	p = &rte_eth_fp_ops[rx_port_id];
+	qd = p->rxq.data[rx_queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(rx_port_id, 0);
+
+	if (qd == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
+				rx_queue_id, rx_port_id);
+		return 0;
+	}
+#endif
+
+	if (p->recycle_rx_descriptors_refill == NULL)
+		return 0;
+
+	/* Replenish the Rx descriptors with the recycling
+	 * into Rx mbuf ring.
+	 */
+	p->recycle_rx_descriptors_refill(qd, nb_mbufs);
+
+	return nb_mbufs;
+}
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index dcf8adab92..a2e6ea6b6c 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -56,6 +56,13 @@  typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
 /** @internal Check the status of a Tx descriptor */
 typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
 
+/** @internal Copy used mbufs from Tx mbuf ring into Rx mbuf ring */
+typedef uint16_t (*eth_recycle_tx_mbufs_reuse_t)(void *txq,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+
+/** @internal Refill Rx descriptors with the recycling mbufs */
+typedef void (*eth_recycle_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
+
 /**
  * @internal
  * Structure used to hold opaque pointers to internal ethdev Rx/Tx
@@ -90,9 +97,11 @@  struct rte_eth_fp_ops {
 	eth_rx_queue_count_t rx_queue_count;
 	/** Check the status of a Rx descriptor. */
 	eth_rx_descriptor_status_t rx_descriptor_status;
+	/** Refill Rx descriptors with the recycling mbufs. */
+	eth_recycle_rx_descriptors_refill_t recycle_rx_descriptors_refill;
 	/** Rx queues data. */
 	struct rte_ethdev_qdata rxq;
-	uintptr_t reserved1[3];
+	uintptr_t reserved1[2];
 	/**@}*/
 
 	/**@{*/
@@ -106,9 +115,11 @@  struct rte_eth_fp_ops {
 	eth_tx_prep_t tx_pkt_prepare;
 	/** Check the status of a Tx descriptor. */
 	eth_tx_descriptor_status_t tx_descriptor_status;
+	/** Copy used mbufs from Tx mbuf ring into Rx. */
+	eth_recycle_tx_mbufs_reuse_t recycle_tx_mbufs_reuse;
 	/** Tx queues data. */
 	struct rte_ethdev_qdata txq;
-	uintptr_t reserved2[3];
+	uintptr_t reserved2[2];
 	/**@}*/
 
 } __rte_cache_aligned;
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 357d1a88c0..45c417f6bd 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -299,6 +299,10 @@  EXPERIMENTAL {
 	rte_flow_action_handle_query_update;
 	rte_flow_async_action_handle_query_update;
 	rte_flow_async_create_by_index;
+
+	# added in 23.07
+	rte_eth_recycle_mbufs;
+	rte_eth_recycle_rx_queue_info_get;
 };
 
 INTERNAL {