[v2,1/3] ethdev: add API for direct rearm mode

Message ID 20220927024756.947272-2-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Direct re-arming of buffers on receive side |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang Sept. 27, 2022, 2:47 a.m. UTC
  Add API for enabling direct rearm mode and for mapping RX and TX
queues. Currently, the API supports 1:1(txq : rxq) mapping.

Furthermore, to avoid Rx load Tx data directly, add API called
'rte_eth_txq_data_get' to get Tx sw_ring and its information.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/ethdev/ethdev_driver.h   |  9 ++++
 lib/ethdev/ethdev_private.c  |  1 +
 lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
 lib/ethdev/rte_ethdev.h      | 95 ++++++++++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev_core.h |  5 ++
 lib/ethdev/version.map       |  4 ++
 6 files changed, 151 insertions(+)
  

Comments

Konstantin Ananyev Oct. 3, 2022, 8:58 a.m. UTC | #1
27/09/2022 03:47, Feifei Wang пишет:
> Add API for enabling direct rearm mode and for mapping RX and TX
> queues. Currently, the API supports 1:1(txq : rxq) mapping.
> 
> Furthermore, to avoid Rx load Tx data directly, add API called
> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   lib/ethdev/ethdev_driver.h   |  9 ++++
>   lib/ethdev/ethdev_private.c  |  1 +
>   lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
>   lib/ethdev/rte_ethdev.h      | 95 ++++++++++++++++++++++++++++++++++++
>   lib/ethdev/rte_ethdev_core.h |  5 ++
>   lib/ethdev/version.map       |  4 ++
>   6 files changed, 151 insertions(+)
> 
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 47a55a419e..14f52907c1 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -58,6 +58,8 @@ struct rte_eth_dev {
>   	eth_rx_descriptor_status_t rx_descriptor_status;
>   	/** Check the status of a Tx descriptor */
>   	eth_tx_descriptor_status_t tx_descriptor_status;
> +	/**  Use Tx mbufs for Rx to rearm */
> +	eth_rx_direct_rearm_t rx_direct_rearm;
>   
>   	/**
>   	 * Device data that is shared between primary and secondary processes
> @@ -486,6 +488,11 @@ typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>   typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
>   				    uint16_t rx_queue_id);
>   
> +/**< @internal Get Tx information of a transmit queue of an Ethernet device. */
> +typedef void (*eth_txq_data_get_t)(struct rte_eth_dev *dev,
> +				      uint16_t tx_queue_id,
> +				      struct rte_eth_txq_data *txq_data);
> +
>   /** @internal Release memory resources allocated by given Rx/Tx queue. */
>   typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
>   				    uint16_t queue_id);
> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
>   	eth_rxq_info_get_t         rxq_info_get;
>   	/** Retrieve Tx queue information */
>   	eth_txq_info_get_t         txq_info_get;
> +	/** Get the address where Tx data is stored */
> +	eth_txq_data_get_t         txq_data_get;
>   	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst mode */
>   	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst mode */
>   	eth_fw_version_get_t       fw_version_get; /**< Get firmware version */
> diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> index 48090c879a..bfe16c7d77 100644
> --- a/lib/ethdev/ethdev_private.c
> +++ b/lib/ethdev/ethdev_private.c
> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
>   	fpo->rx_queue_count = dev->rx_queue_count;
>   	fpo->rx_descriptor_status = dev->rx_descriptor_status;
>   	fpo->tx_descriptor_status = dev->tx_descriptor_status;
> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
>   
>   	fpo->rxq.data = dev->data->rx_queues;
>   	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 0c2c1088c0..0dccec2e4b 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
>   	return ret;
>   }
>   
> +int
> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> +			struct rte_eth_txq_data *txq_data)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +	dev = &rte_eth_devices[port_id];
> +
> +	if (queue_id >= dev->data->nb_tx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n", queue_id);
> +		return -EINVAL;
> +	}
> +
> +	if (txq_data == NULL) {
> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx queue %u data to NULL\n",
> +			port_id, queue_id);
> +		return -EINVAL;
> +	}
> +
> +	if (dev->data->tx_queues == NULL ||
> +			dev->data->tx_queues[queue_id] == NULL) {
> +		RTE_ETHDEV_LOG(ERR,
> +			   "Tx queue %"PRIu16" of device with port_id=%"
> +			   PRIu16" has not been setup\n",
> +			   queue_id, port_id);
> +		return -EINVAL;
> +	}
> +
> +	if (*dev->dev_ops->txq_data_get == NULL)
> +		return -ENOTSUP;
> +
> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
> +
> +	return 0;
> +}
> +
>   static int
>   rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
>   			     uint16_t n_seg, uint32_t *mbp_buf_size,
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index 2e783536c1..daf7f05d62 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
>   	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
>   } __rte_cache_min_aligned;
>   
> +/**
> + * @internal
> + * Structure used to hold pointers to internal ethdev Tx data.
> + * The main purpose is to load and store Tx queue data in direct rearm mode.
> + */
> +struct rte_eth_txq_data {
> +	uint64_t *offloads;
> +	void *tx_sw_ring;
> +	volatile void *tx_ring;
> +	uint16_t *tx_next_dd;
> +	uint16_t *nb_tx_free;
> +	uint16_t nb_tx_desc;
> +	uint16_t tx_rs_thresh;
> +	uint16_t tx_free_thresh;
> +} __rte_cache_min_aligned;
> +

first of all it is not clear why this struct has to be in public header,
why it can't be in on of ethdev 'private' headers.
Second it looks like a snippet from private txq fields for
some Intel (and alike) PMDs (i40e, ice, etc.).
How it supposed to to be universal and be applicable
for any PMD that decides to implement this new API?


>   /* Generic Burst mode flag definition, values can be ORed. */
>   
>   /**
> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
>   int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
>   		const struct rte_eth_rxtx_callback *user_cb);
>   
> +/**
> + * Get the address which Tx data is stored.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The Tx queue on the Ethernet device for which information
> + *   will be retrieved.
> + * @param txq_data
> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> + *
> + * @return
> + *   - 0: Success
> + *   - -ENODEV:  If *port_id* is invalid.
> + *   - -ENOTSUP: routine is not supported by the device PMD.
> + *   - -EINVAL:  The queue_id is out of range.
> + */
> +__rte_experimental
> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> +		struct rte_eth_txq_data *txq_data);
> +
>   /**
>    * Retrieve information about given port's Rx queue.
>    *
> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
>   	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>   }
>   
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Put Tx buffers into Rx sw-ring and rearm descs.
> + *
> + * @param port_id
> + *   Port identifying the receive side.
> + * @param queue_id
> + *   The index of the transmit queue identifying the receive side.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param txq_data
> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> + * @return
> + *   The number of direct-rearmed buffers.
> + */
> +__rte_experimental
> +static __rte_always_inline uint16_t
> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
> +		struct rte_eth_txq_data *txq_data)
> +{
> +	uint16_t nb_rearm;
> +	struct rte_eth_fp_ops *p;
> +	void *qd;
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> +	if (port_id >= RTE_MAX_ETHPORTS ||
> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Invalid port_id=%u or queue_id=%u\n",
> +			port_id, queue_id);
> +		return 0;
> +	}
> +#endif
> +
> +	p = &rte_eth_fp_ops[port_id];
> +	qd = p->rxq.data[queue_id];
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
> +
> +	if (qd == NULL) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> +			queue_id, port_id);
> +		return 0;
> +	}
> +
> +	if (!p->rx_direct_rearm)

This check should be done always (unconditionally).
it is not a mandatory function for the driver (it can safely skip to 
implement it).

> +		return -ENOTSUP;

This function returns uint16_t, why signed integers here?


> +#endif
> +
> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);

So rx_direct_rearm() function knows how to extract data from TX queue?
As I understand that is possible only in one case:
rx_direct_rearm() has full knowledge and acess of txq internals, etc.
That means that rxq and txq have to belong to the same driver and device 
type.

First of all, I still think it is not the best design choice.
If we going ahead with introducing this feature, it better be as generic 
as possible.
Plus it mixes TX and RX code-paths together,
while it would be much better to to keep them independent as they
are right now.

Another thing with such approach - even for the same PMD,
both TXQ and RXQ can have different internal data format and
behavior logic (depending on port/queue configuration).
So rx_direct_rearm() function selection have to be done
based on both RXQ and TXQ config.
So instead of rte_eth_tx_queue_data_get(), you'll probably need:
eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port, 
rx_queue, tx_port, tx_queue);
Then, it will be user responsibility to store it somewhere
and call periodically:

control_path:
	...
	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
		 txport, txqueue);
data-path:
	while(...) {
		rearm_func(rxport, txport, rxqueue, txqueue);
		rte_eth_rx_burst(rxport, rxqueue, ....);
		rte_eth_tx_burst(txport, txqueue, ....);
	}
	

In that case there seems absolutely no point to introduce
struct rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
directly anyway.

Another way - make rte_eth_txq_data totally opaque and allow PMD to
store there some data that will help it to distinguish expected TXQ
format. That will allow PMD to keep rx_direct_rearm() the same for
all supported TXQ formats (it will make decision internally based
on data stored in txq_data).
Though in that case you'll probably need one more dev-op to free 
txq_data.


> +
> +	return nb_rearm;
> +}
> +
>   #ifdef __cplusplus
>   }
>   #endif
> diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> index dcf8adab92..6a328e2481 100644
> --- a/lib/ethdev/rte_ethdev_core.h
> +++ b/lib/ethdev/rte_ethdev_core.h
> @@ -56,6 +56,9 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
>   /** @internal Check the status of a Tx descriptor */
>   typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
>   
> +/** @internal Put Tx buffers into Rx sw-ring and rearm descs */
> +typedef uint16_t (*eth_rx_direct_rearm_t)(void *rxq, struct rte_eth_txq_data *txq_data);
> +
>   /**
>    * @internal
>    * Structure used to hold opaque pointers to internal ethdev Rx/Tx
> @@ -90,6 +93,8 @@ struct rte_eth_fp_ops {
>   	eth_rx_queue_count_t rx_queue_count;
>   	/** Check the status of a Rx descriptor. */
>   	eth_rx_descriptor_status_t rx_descriptor_status;
> +	/** Put Tx buffers into Rx sw-ring and rearm descs */
> +	eth_rx_direct_rearm_t rx_direct_rearm;
>   	/** Rx queues data. */
>   	struct rte_ethdev_qdata rxq;
>   	uintptr_t reserved1[3];
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index 03f52fee91..69e4c376d3 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -285,6 +285,10 @@ EXPERIMENTAL {
>   	rte_mtr_color_in_protocol_priority_get;
>   	rte_mtr_color_in_protocol_set;
>   	rte_mtr_meter_vlan_table_update;
> +
> +	# added in 22.11
> +	rte_eth_tx_queue_data_get;
> +	rte_eth_rx_direct_rearm;
>   };
>   
>   INTERNAL {
  
Feifei Wang Oct. 11, 2022, 7:19 a.m. UTC | #2
> -----邮件原件-----
> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> 发送时间: Monday, October 3, 2022 4:58 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
> Ferruh Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: [PATCH v2 1/3] ethdev: add API for direct rearm mode
> 
> 27/09/2022 03:47, Feifei Wang пишет:
> > Add API for enabling direct rearm mode and for mapping RX and TX
> > queues. Currently, the API supports 1:1(txq : rxq) mapping.
> >
> > Furthermore, to avoid Rx load Tx data directly, add API called
> > 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> >   lib/ethdev/ethdev_driver.h   |  9 ++++
> >   lib/ethdev/ethdev_private.c  |  1 +
> >   lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
> >   lib/ethdev/rte_ethdev.h      | 95
> ++++++++++++++++++++++++++++++++++++
> >   lib/ethdev/rte_ethdev_core.h |  5 ++
> >   lib/ethdev/version.map       |  4 ++
> >   6 files changed, 151 insertions(+)
> >
> > diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> > index 47a55a419e..14f52907c1 100644
> > --- a/lib/ethdev/ethdev_driver.h
> > +++ b/lib/ethdev/ethdev_driver.h
> > @@ -58,6 +58,8 @@ struct rte_eth_dev {
> >   	eth_rx_descriptor_status_t rx_descriptor_status;
> >   	/** Check the status of a Tx descriptor */
> >   	eth_tx_descriptor_status_t tx_descriptor_status;
> > +	/**  Use Tx mbufs for Rx to rearm */
> > +	eth_rx_direct_rearm_t rx_direct_rearm;
> >
> >   	/**
> >   	 * Device data that is shared between primary and secondary
> > processes @@ -486,6 +488,11 @@ typedef int
> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >   typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> >   				    uint16_t rx_queue_id);
> >
> > +/**< @internal Get Tx information of a transmit queue of an Ethernet
> > +device. */ typedef void (*eth_txq_data_get_t)(struct rte_eth_dev *dev,
> > +				      uint16_t tx_queue_id,
> > +				      struct rte_eth_txq_data *txq_data);
> > +
> >   /** @internal Release memory resources allocated by given Rx/Tx queue.
> */
> >   typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
> >   				    uint16_t queue_id);
> > @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
> >   	eth_rxq_info_get_t         rxq_info_get;
> >   	/** Retrieve Tx queue information */
> >   	eth_txq_info_get_t         txq_info_get;
> > +	/** Get the address where Tx data is stored */
> > +	eth_txq_data_get_t         txq_data_get;
> >   	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst
> mode */
> >   	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst
> mode */
> >   	eth_fw_version_get_t       fw_version_get; /**< Get firmware
> version */
> > diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> > index 48090c879a..bfe16c7d77 100644
> > --- a/lib/ethdev/ethdev_private.c
> > +++ b/lib/ethdev/ethdev_private.c
> > @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops
> *fpo,
> >   	fpo->rx_queue_count = dev->rx_queue_count;
> >   	fpo->rx_descriptor_status = dev->rx_descriptor_status;
> >   	fpo->tx_descriptor_status = dev->tx_descriptor_status;
> > +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
> >
> >   	fpo->rxq.data = dev->data->rx_queues;
> >   	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> > diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> > 0c2c1088c0..0dccec2e4b 100644
> > --- a/lib/ethdev/rte_ethdev.c
> > +++ b/lib/ethdev/rte_ethdev.c
> > @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
> >   	return ret;
> >   }
> >
> > +int
> > +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> > +			struct rte_eth_txq_data *txq_data) {
> > +	struct rte_eth_dev *dev;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> > +	dev = &rte_eth_devices[port_id];
> > +
> > +	if (queue_id >= dev->data->nb_tx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
> queue_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (txq_data == NULL) {
> > +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx
> queue %u data to NULL\n",
> > +			port_id, queue_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (dev->data->tx_queues == NULL ||
> > +			dev->data->tx_queues[queue_id] == NULL) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			   "Tx queue %"PRIu16" of device with port_id=%"
> > +			   PRIu16" has not been setup\n",
> > +			   queue_id, port_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (*dev->dev_ops->txq_data_get == NULL)
> > +		return -ENOTSUP;
> > +
> > +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
> > +
> > +	return 0;
> > +}
> > +
> >   static int
> >   rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
> >   			     uint16_t n_seg, uint32_t *mbp_buf_size, diff --git
> > a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > 2e783536c1..daf7f05d62 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
> >   	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
> >   } __rte_cache_min_aligned;
> >
> > +/**
> > + * @internal
> > + * Structure used to hold pointers to internal ethdev Tx data.
> > + * The main purpose is to load and store Tx queue data in direct rearm
> mode.
> > + */
> > +struct rte_eth_txq_data {
> > +	uint64_t *offloads;
> > +	void *tx_sw_ring;
> > +	volatile void *tx_ring;
> > +	uint16_t *tx_next_dd;
> > +	uint16_t *nb_tx_free;
> > +	uint16_t nb_tx_desc;
> > +	uint16_t tx_rs_thresh;
> > +	uint16_t tx_free_thresh;
> > +} __rte_cache_min_aligned;
> > +
> 
> first of all it is not clear why this struct has to be in public header, why it can't
> be in on of ethdev 'private' headers.
> Second it looks like a snippet from private txq fields for some Intel (and alike)
> PMDs (i40e, ice, etc.).
> How it supposed to to be universal and be applicable for any PMD that
> decides to implement this new API?
> 
> 
> >   /* Generic Burst mode flag definition, values can be ORed. */
> >
> >   /**
> > @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
> port_id, uint16_t queue_id,
> >   int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
> >   		const struct rte_eth_rxtx_callback *user_cb);
> >
> > +/**
> > + * Get the address which Tx data is stored.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param queue_id
> > + *   The Tx queue on the Ethernet device for which information
> > + *   will be retrieved.
> > + * @param txq_data
> > + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> > + *
> > + * @return
> > + *   - 0: Success
> > + *   - -ENODEV:  If *port_id* is invalid.
> > + *   - -ENOTSUP: routine is not supported by the device PMD.
> > + *   - -EINVAL:  The queue_id is out of range.
> > + */
> > +__rte_experimental
> > +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> > +		struct rte_eth_txq_data *txq_data);
> > +
> >   /**
> >    * Retrieve information about given port's Rx queue.
> >    *
> > @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
> queue_id,
> >   	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
> >   }
> >
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > +notice
> > + *
> > + * Put Tx buffers into Rx sw-ring and rearm descs.
> > + *
> > + * @param port_id
> > + *   Port identifying the receive side.
> > + * @param queue_id
> > + *   The index of the transmit queue identifying the receive side.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> supplied
> > + *   to rte_eth_dev_configure().
> > + * @param txq_data
> > + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> > + * @return
> > + *   The number of direct-rearmed buffers.
> > + */
> > +__rte_experimental
> > +static __rte_always_inline uint16_t
> > +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
> > +		struct rte_eth_txq_data *txq_data)
> > +{
> > +	uint16_t nb_rearm;
> > +	struct rte_eth_fp_ops *p;
> > +	void *qd;
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			"Invalid port_id=%u or queue_id=%u\n",
> > +			port_id, queue_id);
> > +		return 0;
> > +	}
> > +#endif
> > +
> > +	p = &rte_eth_fp_ops[port_id];
> > +	qd = p->rxq.data[queue_id];
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
> > +
> > +	if (qd == NULL) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> port_id=%u\n",
> > +			queue_id, port_id);
> > +		return 0;
> > +	}
> > +
> > +	if (!p->rx_direct_rearm)
> 
> This check should be done always (unconditionally).
> it is not a mandatory function for the driver (it can safely skip to implement it).
> 
> > +		return -ENOTSUP;
> 
> This function returns uint16_t, why signed integers here?
> 
> 
> > +#endif
> > +
> > +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
> 
> So rx_direct_rearm() function knows how to extract data from TX queue?
> As I understand that is possible only in one case:
> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
> That means that rxq and txq have to belong to the same driver and device
> type.
> 
Thanks for the comments, and I have some questions for this.

> First of all, I still think it is not the best design choice.
> If we going ahead with introducing this feature, it better be as generic as
> possible.
> Plus it mixes TX and RX code-paths together, while it would be much better
> to to keep them independent as they are right now.
> 
> Another thing with such approach - even for the same PMD, both TXQ and
> RXQ can have different internal data format and behavior logic (depending
> on port/queue configuration).
1. Here TXQ and RXQ have different internal format means the queue type 
and  descs can be different, right? If I understand correctly, based on your 
first strategy, is it means we will need different 'rearm_func' for different
queue type in the same PMD?

> So rx_direct_rearm() function selection have to be done based on both RXQ
> and TXQ config.
> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
> rx_queue, tx_port, tx_queue);
> Then, it will be user responsibility to store it somewhere and call periodically:
> 
> control_path:
> 	...
> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
> 		 txport, txqueue);
> data-path:
> 	while(...) {
> 		rearm_func(rxport, txport, rxqueue, txqueue);
> 		rte_eth_rx_burst(rxport, rxqueue, ....);
> 		rte_eth_tx_burst(txport, txqueue, ....);
> 	}
> 
> 
> In that case there seems absolutely no point to introduce struct
> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data directly
> anyway.
2. This is a very good proposal and it will be our first choice. Before working on it,
I have a few questions about how to implement 'rearm_func'. 
Like you say above, mixed Rx and Tx path code in 'rearm_func' means the hard-code
is mixed like:
rearm_func(...) {
     ...
    txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
    for (...) {
       rxep[i].mbuf = txep[i].mbuf;
       mb0 = txep[i].mbuf;
       paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
      dma_addr0 = vdupq_n_u64(paddr);
      vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
    }
}
Is my understanding is right?

> 
> Another way - make rte_eth_txq_data totally opaque and allow PMD to
> store there some data that will help it to distinguish expected TXQ format.
> That will allow PMD to keep rx_direct_rearm() the same for all supported
> TXQ formats (it will make decision internally based on data stored in
> txq_data).
> Though in that case you'll probably need one more dev-op to free txq_data.
  
Konstantin Ananyev Oct. 11, 2022, 10:21 p.m. UTC | #3
>>> Add API for enabling direct rearm mode and for mapping RX and TX
>>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
>>>
>>> Furthermore, to avoid Rx load Tx data directly, add API called
>>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
>>>
>>> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>> ---
>>>    lib/ethdev/ethdev_driver.h   |  9 ++++
>>>    lib/ethdev/ethdev_private.c  |  1 +
>>>    lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
>>>    lib/ethdev/rte_ethdev.h      | 95
>> ++++++++++++++++++++++++++++++++++++
>>>    lib/ethdev/rte_ethdev_core.h |  5 ++
>>>    lib/ethdev/version.map       |  4 ++
>>>    6 files changed, 151 insertions(+)
>>>
>>> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
>>> index 47a55a419e..14f52907c1 100644
>>> --- a/lib/ethdev/ethdev_driver.h
>>> +++ b/lib/ethdev/ethdev_driver.h
>>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
>>>    	eth_rx_descriptor_status_t rx_descriptor_status;
>>>    	/** Check the status of a Tx descriptor */
>>>    	eth_tx_descriptor_status_t tx_descriptor_status;
>>> +	/**  Use Tx mbufs for Rx to rearm */
>>> +	eth_rx_direct_rearm_t rx_direct_rearm;
>>>
>>>    	/**
>>>    	 * Device data that is shared between primary and secondary
>>> processes @@ -486,6 +488,11 @@ typedef int
>> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>>>    typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
>>>    				    uint16_t rx_queue_id);
>>>
>>> +/**< @internal Get Tx information of a transmit queue of an Ethernet
>>> +device. */ typedef void (*eth_txq_data_get_t)(struct rte_eth_dev *dev,
>>> +				      uint16_t tx_queue_id,
>>> +				      struct rte_eth_txq_data *txq_data);
>>> +
>>>    /** @internal Release memory resources allocated by given Rx/Tx queue.
>> */
>>>    typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
>>>    				    uint16_t queue_id);
>>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
>>>    	eth_rxq_info_get_t         rxq_info_get;
>>>    	/** Retrieve Tx queue information */
>>>    	eth_txq_info_get_t         txq_info_get;
>>> +	/** Get the address where Tx data is stored */
>>> +	eth_txq_data_get_t         txq_data_get;
>>>    	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst
>> mode */
>>>    	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst
>> mode */
>>>    	eth_fw_version_get_t       fw_version_get; /**< Get firmware
>> version */
>>> diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
>>> index 48090c879a..bfe16c7d77 100644
>>> --- a/lib/ethdev/ethdev_private.c
>>> +++ b/lib/ethdev/ethdev_private.c
>>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops
>> *fpo,
>>>    	fpo->rx_queue_count = dev->rx_queue_count;
>>>    	fpo->rx_descriptor_status = dev->rx_descriptor_status;
>>>    	fpo->tx_descriptor_status = dev->tx_descriptor_status;
>>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
>>>
>>>    	fpo->rxq.data = dev->data->rx_queues;
>>>    	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
>>> 0c2c1088c0..0dccec2e4b 100644
>>> --- a/lib/ethdev/rte_ethdev.c
>>> +++ b/lib/ethdev/rte_ethdev.c
>>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
>>>    	return ret;
>>>    }
>>>
>>> +int
>>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
>>> +			struct rte_eth_txq_data *txq_data) {
>>> +	struct rte_eth_dev *dev;
>>> +
>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>> +	dev = &rte_eth_devices[port_id];
>>> +
>>> +	if (queue_id >= dev->data->nb_tx_queues) {
>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
>> queue_id);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	if (txq_data == NULL) {
>>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx
>> queue %u data to NULL\n",
>>> +			port_id, queue_id);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	if (dev->data->tx_queues == NULL ||
>>> +			dev->data->tx_queues[queue_id] == NULL) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			   "Tx queue %"PRIu16" of device with port_id=%"
>>> +			   PRIu16" has not been setup\n",
>>> +			   queue_id, port_id);
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	if (*dev->dev_ops->txq_data_get == NULL)
>>> +		return -ENOTSUP;
>>> +
>>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>    static int
>>>    rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
>>>    			     uint16_t n_seg, uint32_t *mbp_buf_size, diff --git
>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
>>> 2e783536c1..daf7f05d62 100644
>>> --- a/lib/ethdev/rte_ethdev.h
>>> +++ b/lib/ethdev/rte_ethdev.h
>>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
>>>    	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
>>>    } __rte_cache_min_aligned;
>>>
>>> +/**
>>> + * @internal
>>> + * Structure used to hold pointers to internal ethdev Tx data.
>>> + * The main purpose is to load and store Tx queue data in direct rearm
>> mode.
>>> + */
>>> +struct rte_eth_txq_data {
>>> +	uint64_t *offloads;
>>> +	void *tx_sw_ring;
>>> +	volatile void *tx_ring;
>>> +	uint16_t *tx_next_dd;
>>> +	uint16_t *nb_tx_free;
>>> +	uint16_t nb_tx_desc;
>>> +	uint16_t tx_rs_thresh;
>>> +	uint16_t tx_free_thresh;
>>> +} __rte_cache_min_aligned;
>>> +
>>
>> first of all it is not clear why this struct has to be in public header, why it can't
>> be in on of ethdev 'private' headers.
>> Second it looks like a snippet from private txq fields for some Intel (and alike)
>> PMDs (i40e, ice, etc.).
>> How it supposed to to be universal and be applicable for any PMD that
>> decides to implement this new API?
>>
>>
>>>    /* Generic Burst mode flag definition, values can be ORed. */
>>>
>>>    /**
>>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
>> port_id, uint16_t queue_id,
>>>    int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
>>>    		const struct rte_eth_rxtx_callback *user_cb);
>>>
>>> +/**
>>> + * Get the address which Tx data is stored.
>>> + *
>>> + * @param port_id
>>> + *   The port identifier of the Ethernet device.
>>> + * @param queue_id
>>> + *   The Tx queue on the Ethernet device for which information
>>> + *   will be retrieved.
>>> + * @param txq_data
>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>> + *
>>> + * @return
>>> + *   - 0: Success
>>> + *   - -ENODEV:  If *port_id* is invalid.
>>> + *   - -ENOTSUP: routine is not supported by the device PMD.
>>> + *   - -EINVAL:  The queue_id is out of range.
>>> + */
>>> +__rte_experimental
>>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
>>> +		struct rte_eth_txq_data *txq_data);
>>> +
>>>    /**
>>>     * Retrieve information about given port's Rx queue.
>>>     *
>>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
>> queue_id,
>>>    	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>>>    }
>>>
>>> +/**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>>> +notice
>>> + *
>>> + * Put Tx buffers into Rx sw-ring and rearm descs.
>>> + *
>>> + * @param port_id
>>> + *   Port identifying the receive side.
>>> + * @param queue_id
>>> + *   The index of the transmit queue identifying the receive side.
>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
>> supplied
>>> + *   to rte_eth_dev_configure().
>>> + * @param txq_data
>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>> + * @return
>>> + *   The number of direct-rearmed buffers.
>>> + */
>>> +__rte_experimental
>>> +static __rte_always_inline uint16_t
>>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
>>> +		struct rte_eth_txq_data *txq_data)
>>> +{
>>> +	uint16_t nb_rearm;
>>> +	struct rte_eth_fp_ops *p;
>>> +	void *qd;
>>> +
>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>> +	if (port_id >= RTE_MAX_ETHPORTS ||
>>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			"Invalid port_id=%u or queue_id=%u\n",
>>> +			port_id, queue_id);
>>> +		return 0;
>>> +	}
>>> +#endif
>>> +
>>> +	p = &rte_eth_fp_ops[port_id];
>>> +	qd = p->rxq.data[queue_id];
>>> +
>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
>>> +
>>> +	if (qd == NULL) {
>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
>> port_id=%u\n",
>>> +			queue_id, port_id);
>>> +		return 0;
>>> +	}
>>> +
>>> +	if (!p->rx_direct_rearm)
>>
>> This check should be done always (unconditionally).
>> it is not a mandatory function for the driver (it can safely skip to implement it).
>>
>>> +		return -ENOTSUP;
>>
>> This function returns uint16_t, why signed integers here?
>>
>>
>>> +#endif
>>> +
>>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
>>
>> So rx_direct_rearm() function knows how to extract data from TX queue?
>> As I understand that is possible only in one case:
>> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
>> That means that rxq and txq have to belong to the same driver and device
>> type.
>>
> Thanks for the comments, and I have some questions for this.
> 
>> First of all, I still think it is not the best design choice.
>> If we going ahead with introducing this feature, it better be as generic as
>> possible.
>> Plus it mixes TX and RX code-paths together, while it would be much better
>> to to keep them independent as they are right now.
>>
>> Another thing with such approach - even for the same PMD, both TXQ and
>> RXQ can have different internal data format and behavior logic (depending
>> on port/queue configuration).
> 1. Here TXQ and RXQ have different internal format means the queue type
> and  descs can be different, right? If I understand correctly, based on your
> first strategy, is it means we will need different 'rearm_func' for different
> queue type in the same PMD?

Yes, I think so.
If let say we have some PMD where depending on the config,
there cpuld be 2 different RXQ formats: rxq_a and rxq_b,
and 2 different txq formats: txq_c, txq_d.
Then assuming PMD would like to support direct-rearm mode for all four 
combinations, it needs 4 different rearm functions:

rearm_txq_c_to_rxq_a()
rearm_txq_c_to_rxq_b()
rearm_txq_d_to_rxq_a()
rearm_txq_d_to_rxq_b()


> 
>> So rx_direct_rearm() function selection have to be done based on both RXQ
>> and TXQ config.
>> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
>> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
>> rx_queue, tx_port, tx_queue);
>> Then, it will be user responsibility to store it somewhere and call periodically:
>>
>> control_path:
>> 	...
>> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
>> 		 txport, txqueue);
>> data-path:
>> 	while(...) {
>> 		rearm_func(rxport, txport, rxqueue, txqueue);
>> 		rte_eth_rx_burst(rxport, rxqueue, ....);
>> 		rte_eth_tx_burst(txport, txqueue, ....);
>> 	}
>>
>>
>> In that case there seems absolutely no point to introduce struct
>> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data directly
>> anyway.
> 2. This is a very good proposal and it will be our first choice. Before working on it,
> I have a few questions about how to implement 'rearm_func'.
> Like you say above, mixed Rx and Tx path code in 'rearm_func' means the hard-code
> is mixed like:
> rearm_func(...) {
>       ...
>      txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
>      for (...) {
>         rxep[i].mbuf = txep[i].mbuf;
>         mb0 = txep[i].mbuf;
>         paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
>        dma_addr0 = vdupq_n_u64(paddr);
>        vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
>      }
> }
> Is my understanding is right?


Sorry, I don't understand the question.
Can you probably elaborate a bit?

> 
>>
>> Another way - make rte_eth_txq_data totally opaque and allow PMD to
>> store there some data that will help it to distinguish expected TXQ format.
>> That will allow PMD to keep rx_direct_rearm() the same for all supported
>> TXQ formats (it will make decision internally based on data stored in
>> txq_data).
>> Though in that case you'll probably need one more dev-op to free txq_data.
>
  
Feifei Wang Oct. 12, 2022, 1:38 p.m. UTC | #4
> -----邮件原件-----
> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> 发送时间: Wednesday, October 12, 2022 6:21 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
> Ferruh Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm mode
> 
> 
> 
> >>> Add API for enabling direct rearm mode and for mapping RX and TX
> >>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
> >>>
> >>> Furthermore, to avoid Rx load Tx data directly, add API called
> >>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
> >>>
> >>> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >>> ---
> >>>    lib/ethdev/ethdev_driver.h   |  9 ++++
> >>>    lib/ethdev/ethdev_private.c  |  1 +
> >>>    lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
> >>>    lib/ethdev/rte_ethdev.h      | 95
> >> ++++++++++++++++++++++++++++++++++++
> >>>    lib/ethdev/rte_ethdev_core.h |  5 ++
> >>>    lib/ethdev/version.map       |  4 ++
> >>>    6 files changed, 151 insertions(+)
> >>>
> >>> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> >>> index 47a55a419e..14f52907c1 100644
> >>> --- a/lib/ethdev/ethdev_driver.h
> >>> +++ b/lib/ethdev/ethdev_driver.h
> >>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
> >>>    	eth_rx_descriptor_status_t rx_descriptor_status;
> >>>    	/** Check the status of a Tx descriptor */
> >>>    	eth_tx_descriptor_status_t tx_descriptor_status;
> >>> +	/**  Use Tx mbufs for Rx to rearm */
> >>> +	eth_rx_direct_rearm_t rx_direct_rearm;
> >>>
> >>>    	/**
> >>>    	 * Device data that is shared between primary and secondary
> >>> processes @@ -486,6 +488,11 @@ typedef int
> >> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >>>    typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> >>>    				    uint16_t rx_queue_id);
> >>>
> >>> +/**< @internal Get Tx information of a transmit queue of an
> >>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct
> rte_eth_dev *dev,
> >>> +				      uint16_t tx_queue_id,
> >>> +				      struct rte_eth_txq_data *txq_data);
> >>> +
> >>>    /** @internal Release memory resources allocated by given Rx/Tx
> queue.
> >> */
> >>>    typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
> >>>    				    uint16_t queue_id);
> >>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
> >>>    	eth_rxq_info_get_t         rxq_info_get;
> >>>    	/** Retrieve Tx queue information */
> >>>    	eth_txq_info_get_t         txq_info_get;
> >>> +	/** Get the address where Tx data is stored */
> >>> +	eth_txq_data_get_t         txq_data_get;
> >>>    	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst
> >> mode */
> >>>    	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst
> >> mode */
> >>>    	eth_fw_version_get_t       fw_version_get; /**< Get firmware
> >> version */
> >>> diff --git a/lib/ethdev/ethdev_private.c
> >>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77 100644
> >>> --- a/lib/ethdev/ethdev_private.c
> >>> +++ b/lib/ethdev/ethdev_private.c
> >>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops
> >> *fpo,
> >>>    	fpo->rx_queue_count = dev->rx_queue_count;
> >>>    	fpo->rx_descriptor_status = dev->rx_descriptor_status;
> >>>    	fpo->tx_descriptor_status = dev->tx_descriptor_status;
> >>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
> >>>
> >>>    	fpo->rxq.data = dev->data->rx_queues;
> >>>    	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> >>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
> >>> 0c2c1088c0..0dccec2e4b 100644
> >>> --- a/lib/ethdev/rte_ethdev.c
> >>> +++ b/lib/ethdev/rte_ethdev.c
> >>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
> >>>    	return ret;
> >>>    }
> >>>
> >>> +int
> >>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> >>> +			struct rte_eth_txq_data *txq_data) {
> >>> +	struct rte_eth_dev *dev;
> >>> +
> >>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +
> >>> +	if (queue_id >= dev->data->nb_tx_queues) {
> >>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
> >> queue_id);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	if (txq_data == NULL) {
> >>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx
> >> queue %u data to NULL\n",
> >>> +			port_id, queue_id);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	if (dev->data->tx_queues == NULL ||
> >>> +			dev->data->tx_queues[queue_id] == NULL) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			   "Tx queue %"PRIu16" of device with port_id=%"
> >>> +			   PRIu16" has not been setup\n",
> >>> +			   queue_id, port_id);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	if (*dev->dev_ops->txq_data_get == NULL)
> >>> +		return -ENOTSUP;
> >>> +
> >>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>>    static int
> >>>    rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split
> *rx_seg,
> >>>    			     uint16_t n_seg, uint32_t *mbp_buf_size, diff --git
> >>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> >>> 2e783536c1..daf7f05d62 100644
> >>> --- a/lib/ethdev/rte_ethdev.h
> >>> +++ b/lib/ethdev/rte_ethdev.h
> >>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
> >>>    	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
> >>>    } __rte_cache_min_aligned;
> >>>
> >>> +/**
> >>> + * @internal
> >>> + * Structure used to hold pointers to internal ethdev Tx data.
> >>> + * The main purpose is to load and store Tx queue data in direct
> >>> +rearm
> >> mode.
> >>> + */
> >>> +struct rte_eth_txq_data {
> >>> +	uint64_t *offloads;
> >>> +	void *tx_sw_ring;
> >>> +	volatile void *tx_ring;
> >>> +	uint16_t *tx_next_dd;
> >>> +	uint16_t *nb_tx_free;
> >>> +	uint16_t nb_tx_desc;
> >>> +	uint16_t tx_rs_thresh;
> >>> +	uint16_t tx_free_thresh;
> >>> +} __rte_cache_min_aligned;
> >>> +
> >>
> >> first of all it is not clear why this struct has to be in public
> >> header, why it can't be in on of ethdev 'private' headers.
> >> Second it looks like a snippet from private txq fields for some Intel
> >> (and alike) PMDs (i40e, ice, etc.).
> >> How it supposed to to be universal and be applicable for any PMD that
> >> decides to implement this new API?
> >>
> >>
> >>>    /* Generic Burst mode flag definition, values can be ORed. */
> >>>
> >>>    /**
> >>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
> >> port_id, uint16_t queue_id,
> >>>    int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
> >>>    		const struct rte_eth_rxtx_callback *user_cb);
> >>>
> >>> +/**
> >>> + * Get the address which Tx data is stored.
> >>> + *
> >>> + * @param port_id
> >>> + *   The port identifier of the Ethernet device.
> >>> + * @param queue_id
> >>> + *   The Tx queue on the Ethernet device for which information
> >>> + *   will be retrieved.
> >>> + * @param txq_data
> >>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> >>> + *
> >>> + * @return
> >>> + *   - 0: Success
> >>> + *   - -ENODEV:  If *port_id* is invalid.
> >>> + *   - -ENOTSUP: routine is not supported by the device PMD.
> >>> + *   - -EINVAL:  The queue_id is out of range.
> >>> + */
> >>> +__rte_experimental
> >>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> >>> +		struct rte_eth_txq_data *txq_data);
> >>> +
> >>>    /**
> >>>     * Retrieve information about given port's Rx queue.
> >>>     *
> >>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
> >> queue_id,
> >>>    	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
> >>>    }
> >>>
> >>> +/**
> >>> + * @warning
> >>> + * @b EXPERIMENTAL: this API may change, or be removed, without
> >>> +prior notice
> >>> + *
> >>> + * Put Tx buffers into Rx sw-ring and rearm descs.
> >>> + *
> >>> + * @param port_id
> >>> + *   Port identifying the receive side.
> >>> + * @param queue_id
> >>> + *   The index of the transmit queue identifying the receive side.
> >>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
> >> supplied
> >>> + *   to rte_eth_dev_configure().
> >>> + * @param txq_data
> >>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> >>> + * @return
> >>> + *   The number of direct-rearmed buffers.
> >>> + */
> >>> +__rte_experimental
> >>> +static __rte_always_inline uint16_t
> >>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
> >>> +		struct rte_eth_txq_data *txq_data) {
> >>> +	uint16_t nb_rearm;
> >>> +	struct rte_eth_fp_ops *p;
> >>> +	void *qd;
> >>> +
> >>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>> +	if (port_id >= RTE_MAX_ETHPORTS ||
> >>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			"Invalid port_id=%u or queue_id=%u\n",
> >>> +			port_id, queue_id);
> >>> +		return 0;
> >>> +	}
> >>> +#endif
> >>> +
> >>> +	p = &rte_eth_fp_ops[port_id];
> >>> +	qd = p->rxq.data[queue_id];
> >>> +
> >>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
> >>> +
> >>> +	if (qd == NULL) {
> >>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> >> port_id=%u\n",
> >>> +			queue_id, port_id);
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	if (!p->rx_direct_rearm)
> >>
> >> This check should be done always (unconditionally).
> >> it is not a mandatory function for the driver (it can safely skip to
> implement it).
> >>
> >>> +		return -ENOTSUP;
> >>
> >> This function returns uint16_t, why signed integers here?
> >>
> >>
> >>> +#endif
> >>> +
> >>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
> >>
> >> So rx_direct_rearm() function knows how to extract data from TX queue?
> >> As I understand that is possible only in one case:
> >> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
> >> That means that rxq and txq have to belong to the same driver and
> >> device type.
> >>
> > Thanks for the comments, and I have some questions for this.
> >
> >> First of all, I still think it is not the best design choice.
> >> If we going ahead with introducing this feature, it better be as
> >> generic as possible.
> >> Plus it mixes TX and RX code-paths together, while it would be much
> >> better to to keep them independent as they are right now.
> >>
> >> Another thing with such approach - even for the same PMD, both TXQ
> >> and RXQ can have different internal data format and behavior logic
> >> (depending on port/queue configuration).
> > 1. Here TXQ and RXQ have different internal format means the queue
> > type and  descs can be different, right? If I understand correctly,
> > based on your first strategy, is it means we will need different
> > 'rearm_func' for different queue type in the same PMD?
> 
> Yes, I think so.
> If let say we have some PMD where depending on the config, there cpuld be
> 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq formats: txq_c,
> txq_d.
> Then assuming PMD would like to support direct-rearm mode for all four
> combinations, it needs 4 different rearm functions:
> 
> rearm_txq_c_to_rxq_a()
> rearm_txq_c_to_rxq_b()
> rearm_txq_d_to_rxq_a()
> rearm_txq_d_to_rxq_b()
> 
Thank you for your detailed explanation, I can understand this.
> 
> >
> >> So rx_direct_rearm() function selection have to be done based on both
> >> RXQ and TXQ config.
> >> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
> >> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
> >> rx_queue, tx_port, tx_queue);
> >> Then, it will be user responsibility to store it somewhere and call
> periodically:
> >>
> >> control_path:
> >> 	...
> >> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
> >> 		 txport, txqueue);
> >> data-path:
> >> 	while(...) {
> >> 		rearm_func(rxport, txport, rxqueue, txqueue);
> >> 		rte_eth_rx_burst(rxport, rxqueue, ....);
> >> 		rte_eth_tx_burst(txport, txqueue, ....);
> >> 	}
> >>
> >>
> >> In that case there seems absolutely no point to introduce struct
> >> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
> >> directly anyway.
> > 2. This is a very good proposal and it will be our first choice.
> > Before working on it, I have a few questions about how to implement
> 'rearm_func'.
> > Like you say above, mixed Rx and Tx path code in 'rearm_func' means
> > the hard-code is mixed like:
> > rearm_func(...) {
> >       ...
> >      txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
> >      for (...) {
> >         rxep[i].mbuf = txep[i].mbuf;
> >         mb0 = txep[i].mbuf;
> >         paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
> >        dma_addr0 = vdupq_n_u64(paddr);
> >        vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
> >      }
> > }
> > Is my understanding is right?
> 
> 
> Sorry, I don't understand the question.
> Can you probably elaborate a bit?

Sorry for my unclear expression.

I mean if we need two func which contains tx and rx paths code respectively in rearm_func, like:
rearm_func(...) {
         rte_tx_fill_sw_ring;
         rte_rx_rearm_descs;
}

Or just mixed tx and rx path code like I said before. I prefer 'rx and tx hard code mixed', 
because from the performance perspective, this can reduce the cost of function calls.
> 
> >
> >>
> >> Another way - make rte_eth_txq_data totally opaque and allow PMD to
> >> store there some data that will help it to distinguish expected TXQ format.
> >> That will allow PMD to keep rx_direct_rearm() the same for all
> >> supported TXQ formats (it will make decision internally based on data
> >> stored in txq_data).
> >> Though in that case you'll probably need one more dev-op to free
> txq_data.
> >
  
Konstantin Ananyev Oct. 13, 2022, 9:48 a.m. UTC | #5
12/10/2022 14:38, Feifei Wang пишет:
> 
> 
>> -----邮件原件-----
>> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
>> 发送时间: Wednesday, October 12, 2022 6:21 AM
>> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
>> Ferruh Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
>> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
>> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
>> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
>> <Ruifeng.Wang@arm.com>
>> 主题: Re: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm mode
>>
>>
>>
>>>>> Add API for enabling direct rearm mode and for mapping RX and TX
>>>>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
>>>>>
>>>>> Furthermore, to avoid Rx load Tx data directly, add API called
>>>>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
>>>>>
>>>>> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>>>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>>>> ---
>>>>>     lib/ethdev/ethdev_driver.h   |  9 ++++
>>>>>     lib/ethdev/ethdev_private.c  |  1 +
>>>>>     lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
>>>>>     lib/ethdev/rte_ethdev.h      | 95
>>>> ++++++++++++++++++++++++++++++++++++
>>>>>     lib/ethdev/rte_ethdev_core.h |  5 ++
>>>>>     lib/ethdev/version.map       |  4 ++
>>>>>     6 files changed, 151 insertions(+)
>>>>>
>>>>> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
>>>>> index 47a55a419e..14f52907c1 100644
>>>>> --- a/lib/ethdev/ethdev_driver.h
>>>>> +++ b/lib/ethdev/ethdev_driver.h
>>>>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
>>>>>     	eth_rx_descriptor_status_t rx_descriptor_status;
>>>>>     	/** Check the status of a Tx descriptor */
>>>>>     	eth_tx_descriptor_status_t tx_descriptor_status;
>>>>> +	/**  Use Tx mbufs for Rx to rearm */
>>>>> +	eth_rx_direct_rearm_t rx_direct_rearm;
>>>>>
>>>>>     	/**
>>>>>     	 * Device data that is shared between primary and secondary
>>>>> processes @@ -486,6 +488,11 @@ typedef int
>>>> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>>>>>     typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
>>>>>     				    uint16_t rx_queue_id);
>>>>>
>>>>> +/**< @internal Get Tx information of a transmit queue of an
>>>>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct
>> rte_eth_dev *dev,
>>>>> +				      uint16_t tx_queue_id,
>>>>> +				      struct rte_eth_txq_data *txq_data);
>>>>> +
>>>>>     /** @internal Release memory resources allocated by given Rx/Tx
>> queue.
>>>> */
>>>>>     typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
>>>>>     				    uint16_t queue_id);
>>>>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
>>>>>     	eth_rxq_info_get_t         rxq_info_get;
>>>>>     	/** Retrieve Tx queue information */
>>>>>     	eth_txq_info_get_t         txq_info_get;
>>>>> +	/** Get the address where Tx data is stored */
>>>>> +	eth_txq_data_get_t         txq_data_get;
>>>>>     	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst
>>>> mode */
>>>>>     	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst
>>>> mode */
>>>>>     	eth_fw_version_get_t       fw_version_get; /**< Get firmware
>>>> version */
>>>>> diff --git a/lib/ethdev/ethdev_private.c
>>>>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77 100644
>>>>> --- a/lib/ethdev/ethdev_private.c
>>>>> +++ b/lib/ethdev/ethdev_private.c
>>>>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct rte_eth_fp_ops
>>>> *fpo,
>>>>>     	fpo->rx_queue_count = dev->rx_queue_count;
>>>>>     	fpo->rx_descriptor_status = dev->rx_descriptor_status;
>>>>>     	fpo->tx_descriptor_status = dev->tx_descriptor_status;
>>>>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
>>>>>
>>>>>     	fpo->rxq.data = dev->data->rx_queues;
>>>>>     	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
>>>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c index
>>>>> 0c2c1088c0..0dccec2e4b 100644
>>>>> --- a/lib/ethdev/rte_ethdev.c
>>>>> +++ b/lib/ethdev/rte_ethdev.c
>>>>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
>>>>>     	return ret;
>>>>>     }
>>>>>
>>>>> +int
>>>>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
>>>>> +			struct rte_eth_txq_data *txq_data) {
>>>>> +	struct rte_eth_dev *dev;
>>>>> +
>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>>>> +	dev = &rte_eth_devices[port_id];
>>>>> +
>>>>> +	if (queue_id >= dev->data->nb_tx_queues) {
>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
>>>> queue_id);
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	if (txq_data == NULL) {
>>>>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx
>>>> queue %u data to NULL\n",
>>>>> +			port_id, queue_id);
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	if (dev->data->tx_queues == NULL ||
>>>>> +			dev->data->tx_queues[queue_id] == NULL) {
>>>>> +		RTE_ETHDEV_LOG(ERR,
>>>>> +			   "Tx queue %"PRIu16" of device with port_id=%"
>>>>> +			   PRIu16" has not been setup\n",
>>>>> +			   queue_id, port_id);
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	if (*dev->dev_ops->txq_data_get == NULL)
>>>>> +		return -ENOTSUP;
>>>>> +
>>>>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>     static int
>>>>>     rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split
>> *rx_seg,
>>>>>     			     uint16_t n_seg, uint32_t *mbp_buf_size, diff --git
>>>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
>>>>> 2e783536c1..daf7f05d62 100644
>>>>> --- a/lib/ethdev/rte_ethdev.h
>>>>> +++ b/lib/ethdev/rte_ethdev.h
>>>>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
>>>>>     	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
>>>>>     } __rte_cache_min_aligned;
>>>>>
>>>>> +/**
>>>>> + * @internal
>>>>> + * Structure used to hold pointers to internal ethdev Tx data.
>>>>> + * The main purpose is to load and store Tx queue data in direct
>>>>> +rearm
>>>> mode.
>>>>> + */
>>>>> +struct rte_eth_txq_data {
>>>>> +	uint64_t *offloads;
>>>>> +	void *tx_sw_ring;
>>>>> +	volatile void *tx_ring;
>>>>> +	uint16_t *tx_next_dd;
>>>>> +	uint16_t *nb_tx_free;
>>>>> +	uint16_t nb_tx_desc;
>>>>> +	uint16_t tx_rs_thresh;
>>>>> +	uint16_t tx_free_thresh;
>>>>> +} __rte_cache_min_aligned;
>>>>> +
>>>>
>>>> first of all it is not clear why this struct has to be in public
>>>> header, why it can't be in on of ethdev 'private' headers.
>>>> Second it looks like a snippet from private txq fields for some Intel
>>>> (and alike) PMDs (i40e, ice, etc.).
>>>> How it supposed to to be universal and be applicable for any PMD that
>>>> decides to implement this new API?
>>>>
>>>>
>>>>>     /* Generic Burst mode flag definition, values can be ORed. */
>>>>>
>>>>>     /**
>>>>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
>>>> port_id, uint16_t queue_id,
>>>>>     int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
>>>>>     		const struct rte_eth_rxtx_callback *user_cb);
>>>>>
>>>>> +/**
>>>>> + * Get the address which Tx data is stored.
>>>>> + *
>>>>> + * @param port_id
>>>>> + *   The port identifier of the Ethernet device.
>>>>> + * @param queue_id
>>>>> + *   The Tx queue on the Ethernet device for which information
>>>>> + *   will be retrieved.
>>>>> + * @param txq_data
>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>>>> + *
>>>>> + * @return
>>>>> + *   - 0: Success
>>>>> + *   - -ENODEV:  If *port_id* is invalid.
>>>>> + *   - -ENOTSUP: routine is not supported by the device PMD.
>>>>> + *   - -EINVAL:  The queue_id is out of range.
>>>>> + */
>>>>> +__rte_experimental
>>>>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
>>>>> +		struct rte_eth_txq_data *txq_data);
>>>>> +
>>>>>     /**
>>>>>      * Retrieve information about given port's Rx queue.
>>>>>      *
>>>>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
>>>> queue_id,
>>>>>     	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>>>>>     }
>>>>>
>>>>> +/**
>>>>> + * @warning
>>>>> + * @b EXPERIMENTAL: this API may change, or be removed, without
>>>>> +prior notice
>>>>> + *
>>>>> + * Put Tx buffers into Rx sw-ring and rearm descs.
>>>>> + *
>>>>> + * @param port_id
>>>>> + *   Port identifying the receive side.
>>>>> + * @param queue_id
>>>>> + *   The index of the transmit queue identifying the receive side.
>>>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
>>>> supplied
>>>>> + *   to rte_eth_dev_configure().
>>>>> + * @param txq_data
>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>>>> + * @return
>>>>> + *   The number of direct-rearmed buffers.
>>>>> + */
>>>>> +__rte_experimental
>>>>> +static __rte_always_inline uint16_t
>>>>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
>>>>> +		struct rte_eth_txq_data *txq_data) {
>>>>> +	uint16_t nb_rearm;
>>>>> +	struct rte_eth_fp_ops *p;
>>>>> +	void *qd;
>>>>> +
>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>>> +	if (port_id >= RTE_MAX_ETHPORTS ||
>>>>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
>>>>> +		RTE_ETHDEV_LOG(ERR,
>>>>> +			"Invalid port_id=%u or queue_id=%u\n",
>>>>> +			port_id, queue_id);
>>>>> +		return 0;
>>>>> +	}
>>>>> +#endif
>>>>> +
>>>>> +	p = &rte_eth_fp_ops[port_id];
>>>>> +	qd = p->rxq.data[queue_id];
>>>>> +
>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
>>>>> +
>>>>> +	if (qd == NULL) {
>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
>>>> port_id=%u\n",
>>>>> +			queue_id, port_id);
>>>>> +		return 0;
>>>>> +	}
>>>>> +
>>>>> +	if (!p->rx_direct_rearm)
>>>>
>>>> This check should be done always (unconditionally).
>>>> it is not a mandatory function for the driver (it can safely skip to
>> implement it).
>>>>
>>>>> +		return -ENOTSUP;
>>>>
>>>> This function returns uint16_t, why signed integers here?
>>>>
>>>>
>>>>> +#endif
>>>>> +
>>>>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
>>>>
>>>> So rx_direct_rearm() function knows how to extract data from TX queue?
>>>> As I understand that is possible only in one case:
>>>> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
>>>> That means that rxq and txq have to belong to the same driver and
>>>> device type.
>>>>
>>> Thanks for the comments, and I have some questions for this.
>>>
>>>> First of all, I still think it is not the best design choice.
>>>> If we going ahead with introducing this feature, it better be as
>>>> generic as possible.
>>>> Plus it mixes TX and RX code-paths together, while it would be much
>>>> better to to keep them independent as they are right now.
>>>>
>>>> Another thing with such approach - even for the same PMD, both TXQ
>>>> and RXQ can have different internal data format and behavior logic
>>>> (depending on port/queue configuration).
>>> 1. Here TXQ and RXQ have different internal format means the queue
>>> type and  descs can be different, right? If I understand correctly,
>>> based on your first strategy, is it means we will need different
>>> 'rearm_func' for different queue type in the same PMD?
>>
>> Yes, I think so.
>> If let say we have some PMD where depending on the config, there cpuld be
>> 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq formats: txq_c,
>> txq_d.
>> Then assuming PMD would like to support direct-rearm mode for all four
>> combinations, it needs 4 different rearm functions:
>>
>> rearm_txq_c_to_rxq_a()
>> rearm_txq_c_to_rxq_b()
>> rearm_txq_d_to_rxq_a()
>> rearm_txq_d_to_rxq_b()
>>
> Thank you for your detailed explanation, I can understand this.
>>
>>>
>>>> So rx_direct_rearm() function selection have to be done based on both
>>>> RXQ and TXQ config.
>>>> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
>>>> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
>>>> rx_queue, tx_port, tx_queue);
>>>> Then, it will be user responsibility to store it somewhere and call
>> periodically:
>>>>
>>>> control_path:
>>>> 	...
>>>> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
>>>> 		 txport, txqueue);
>>>> data-path:
>>>> 	while(...) {
>>>> 		rearm_func(rxport, txport, rxqueue, txqueue);
>>>> 		rte_eth_rx_burst(rxport, rxqueue, ....);
>>>> 		rte_eth_tx_burst(txport, txqueue, ....);
>>>> 	}
>>>>
>>>>
>>>> In that case there seems absolutely no point to introduce struct
>>>> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
>>>> directly anyway.
>>> 2. This is a very good proposal and it will be our first choice.
>>> Before working on it, I have a few questions about how to implement
>> 'rearm_func'.
>>> Like you say above, mixed Rx and Tx path code in 'rearm_func' means
>>> the hard-code is mixed like:
>>> rearm_func(...) {
>>>        ...
>>>       txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
>>>       for (...) {
>>>          rxep[i].mbuf = txep[i].mbuf;
>>>          mb0 = txep[i].mbuf;
>>>          paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
>>>         dma_addr0 = vdupq_n_u64(paddr);
>>>         vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
>>>       }
>>> }
>>> Is my understanding is right?
>>
>>
>> Sorry, I don't understand the question.
>> Can you probably elaborate a bit?
> 
> Sorry for my unclear expression.
> 
> I mean if we need two func which contains tx and rx paths code respectively in rearm_func, like:
> rearm_func(...) {
>           rte_tx_fill_sw_ring;
>           rte_rx_rearm_descs;
> }
> 
> Or just mixed tx and rx path code like I said before. I prefer 'rx and tx hard code mixed',
> because from the performance perspective, this can reduce the cost of function calls.

I suppose it depends on what we choose:
If we restrict it in a way that rxq and txq have to belong to
the same PMD, then I suppose this decision could be left to each
particular PMD.
If we'd like to allow rearm to work accross different PMDs
(i.e. it would allow to rearm mlx with ice and visa-versa),
then yes we need PMDs somehow to expose rx_sw_ring abstraction
to each other.
My preference would be the second one - as it will make this feature
more flexible and would help to adopt it more widely.
Though the first one is probably easier to implement,
and as I udnerstand you are leaning towards the first one.

Konstantin



>>
>>>
>>>>
>>>> Another way - make rte_eth_txq_data totally opaque and allow PMD to
>>>> store there some data that will help it to distinguish expected TXQ format.
>>>> That will allow PMD to keep rx_direct_rearm() the same for all
>>>> supported TXQ formats (it will make decision internally based on data
>>>> stored in txq_data).
>>>> Though in that case you'll probably need one more dev-op to free
>> txq_data.
>>>
>
  
Feifei Wang Oct. 26, 2022, 7:35 a.m. UTC | #6
> -----邮件原件-----
> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> 发送时间: Thursday, October 13, 2022 5:49 PM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
> Ferruh Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: 回复: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm mode
> 
> 12/10/2022 14:38, Feifei Wang пишет:
> >
> >
> >> -----邮件原件-----
> >> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> >> 发送时间: Wednesday, October 12, 2022 6:21 AM
> >> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
> Ferruh
> >> Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> >> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
> >> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> >> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> >> 主题: Re: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm mode
> >>
> >>
> >>
> >>>>> Add API for enabling direct rearm mode and for mapping RX and TX
> >>>>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
> >>>>>
> >>>>> Furthermore, to avoid Rx load Tx data directly, add API called
> >>>>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
> >>>>>
> >>>>> Suggested-by: Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>
> >>>>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> >>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>>> Reviewed-by: Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>
> >>>>> ---
> >>>>>     lib/ethdev/ethdev_driver.h   |  9 ++++
> >>>>>     lib/ethdev/ethdev_private.c  |  1 +
> >>>>>     lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
> >>>>>     lib/ethdev/rte_ethdev.h      | 95
> >>>> ++++++++++++++++++++++++++++++++++++
> >>>>>     lib/ethdev/rte_ethdev_core.h |  5 ++
> >>>>>     lib/ethdev/version.map       |  4 ++
> >>>>>     6 files changed, 151 insertions(+)
> >>>>>
> >>>>> diff --git a/lib/ethdev/ethdev_driver.h
> >>>>> b/lib/ethdev/ethdev_driver.h index 47a55a419e..14f52907c1 100644
> >>>>> --- a/lib/ethdev/ethdev_driver.h
> >>>>> +++ b/lib/ethdev/ethdev_driver.h
> >>>>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
> >>>>>     	eth_rx_descriptor_status_t rx_descriptor_status;
> >>>>>     	/** Check the status of a Tx descriptor */
> >>>>>     	eth_tx_descriptor_status_t tx_descriptor_status;
> >>>>> +	/**  Use Tx mbufs for Rx to rearm */
> >>>>> +	eth_rx_direct_rearm_t rx_direct_rearm;
> >>>>>
> >>>>>     	/**
> >>>>>     	 * Device data that is shared between primary and secondary
> >>>>> processes @@ -486,6 +488,11 @@ typedef int
> >>>> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >>>>>     typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> >>>>>     				    uint16_t rx_queue_id);
> >>>>>
> >>>>> +/**< @internal Get Tx information of a transmit queue of an
> >>>>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct
> >> rte_eth_dev *dev,
> >>>>> +				      uint16_t tx_queue_id,
> >>>>> +				      struct rte_eth_txq_data
> *txq_data);
> >>>>> +
> >>>>>     /** @internal Release memory resources allocated by given
> >>>>> Rx/Tx
> >> queue.
> >>>> */
> >>>>>     typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
> >>>>>     				    uint16_t queue_id);
> >>>>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
> >>>>>     	eth_rxq_info_get_t         rxq_info_get;
> >>>>>     	/** Retrieve Tx queue information */
> >>>>>     	eth_txq_info_get_t         txq_info_get;
> >>>>> +	/** Get the address where Tx data is stored */
> >>>>> +	eth_txq_data_get_t         txq_data_get;
> >>>>>     	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx
> burst
> >>>> mode */
> >>>>>     	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx
> burst
> >>>> mode */
> >>>>>     	eth_fw_version_get_t       fw_version_get; /**< Get
> firmware
> >>>> version */
> >>>>> diff --git a/lib/ethdev/ethdev_private.c
> >>>>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77 100644
> >>>>> --- a/lib/ethdev/ethdev_private.c
> >>>>> +++ b/lib/ethdev/ethdev_private.c
> >>>>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct
> rte_eth_fp_ops
> >>>> *fpo,
> >>>>>     	fpo->rx_queue_count = dev->rx_queue_count;
> >>>>>     	fpo->rx_descriptor_status = dev->rx_descriptor_status;
> >>>>>     	fpo->tx_descriptor_status = dev->tx_descriptor_status;
> >>>>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
> >>>>>
> >>>>>     	fpo->rxq.data = dev->data->rx_queues;
> >>>>>     	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
> >>>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> >>>>> index 0c2c1088c0..0dccec2e4b 100644
> >>>>> --- a/lib/ethdev/rte_ethdev.c
> >>>>> +++ b/lib/ethdev/rte_ethdev.c
> >>>>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
> >>>>>     	return ret;
> >>>>>     }
> >>>>>
> >>>>> +int
> >>>>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> >>>>> +			struct rte_eth_txq_data *txq_data) {
> >>>>> +	struct rte_eth_dev *dev;
> >>>>> +
> >>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> >>>>> +	dev = &rte_eth_devices[port_id];
> >>>>> +
> >>>>> +	if (queue_id >= dev->data->nb_tx_queues) {
> >>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
> >>>> queue_id);
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>> +
> >>>>> +	if (txq_data == NULL) {
> >>>>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u
> Tx
> >>>> queue %u data to NULL\n",
> >>>>> +			port_id, queue_id);
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>> +
> >>>>> +	if (dev->data->tx_queues == NULL ||
> >>>>> +			dev->data->tx_queues[queue_id] == NULL) {
> >>>>> +		RTE_ETHDEV_LOG(ERR,
> >>>>> +			   "Tx queue %"PRIu16" of device with
> port_id=%"
> >>>>> +			   PRIu16" has not been setup\n",
> >>>>> +			   queue_id, port_id);
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>> +
> >>>>> +	if (*dev->dev_ops->txq_data_get == NULL)
> >>>>> +		return -ENOTSUP;
> >>>>> +
> >>>>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
> >>>>> +
> >>>>> +	return 0;
> >>>>> +}
> >>>>> +
> >>>>>     static int
> >>>>>     rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split
> >> *rx_seg,
> >>>>>     			     uint16_t n_seg, uint32_t *mbp_buf_size,
> diff --git
> >>>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> >>>>> 2e783536c1..daf7f05d62 100644
> >>>>> --- a/lib/ethdev/rte_ethdev.h
> >>>>> +++ b/lib/ethdev/rte_ethdev.h
> >>>>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
> >>>>>     	uint8_t queue_state;        /**< one of
> RTE_ETH_QUEUE_STATE_*. */
> >>>>>     } __rte_cache_min_aligned;
> >>>>>
> >>>>> +/**
> >>>>> + * @internal
> >>>>> + * Structure used to hold pointers to internal ethdev Tx data.
> >>>>> + * The main purpose is to load and store Tx queue data in direct
> >>>>> +rearm
> >>>> mode.
> >>>>> + */
> >>>>> +struct rte_eth_txq_data {
> >>>>> +	uint64_t *offloads;
> >>>>> +	void *tx_sw_ring;
> >>>>> +	volatile void *tx_ring;
> >>>>> +	uint16_t *tx_next_dd;
> >>>>> +	uint16_t *nb_tx_free;
> >>>>> +	uint16_t nb_tx_desc;
> >>>>> +	uint16_t tx_rs_thresh;
> >>>>> +	uint16_t tx_free_thresh;
> >>>>> +} __rte_cache_min_aligned;
> >>>>> +
> >>>>
> >>>> first of all it is not clear why this struct has to be in public
> >>>> header, why it can't be in on of ethdev 'private' headers.
> >>>> Second it looks like a snippet from private txq fields for some
> >>>> Intel (and alike) PMDs (i40e, ice, etc.).
> >>>> How it supposed to to be universal and be applicable for any PMD
> >>>> that decides to implement this new API?
> >>>>
> >>>>
> >>>>>     /* Generic Burst mode flag definition, values can be ORed. */
> >>>>>
> >>>>>     /**
> >>>>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
> >>>> port_id, uint16_t queue_id,
> >>>>>     int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t
> queue_id,
> >>>>>     		const struct rte_eth_rxtx_callback *user_cb);
> >>>>>
> >>>>> +/**
> >>>>> + * Get the address which Tx data is stored.
> >>>>> + *
> >>>>> + * @param port_id
> >>>>> + *   The port identifier of the Ethernet device.
> >>>>> + * @param queue_id
> >>>>> + *   The Tx queue on the Ethernet device for which information
> >>>>> + *   will be retrieved.
> >>>>> + * @param txq_data
> >>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> >>>>> + *
> >>>>> + * @return
> >>>>> + *   - 0: Success
> >>>>> + *   - -ENODEV:  If *port_id* is invalid.
> >>>>> + *   - -ENOTSUP: routine is not supported by the device PMD.
> >>>>> + *   - -EINVAL:  The queue_id is out of range.
> >>>>> + */
> >>>>> +__rte_experimental
> >>>>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t
> queue_id,
> >>>>> +		struct rte_eth_txq_data *txq_data);
> >>>>> +
> >>>>>     /**
> >>>>>      * Retrieve information about given port's Rx queue.
> >>>>>      *
> >>>>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id,
> >>>>> uint16_t
> >>>> queue_id,
> >>>>>     	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
> >>>>>     }
> >>>>>
> >>>>> +/**
> >>>>> + * @warning
> >>>>> + * @b EXPERIMENTAL: this API may change, or be removed, without
> >>>>> +prior notice
> >>>>> + *
> >>>>> + * Put Tx buffers into Rx sw-ring and rearm descs.
> >>>>> + *
> >>>>> + * @param port_id
> >>>>> + *   Port identifying the receive side.
> >>>>> + * @param queue_id
> >>>>> + *   The index of the transmit queue identifying the receive side.
> >>>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
> >>>> supplied
> >>>>> + *   to rte_eth_dev_configure().
> >>>>> + * @param txq_data
> >>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
> >>>>> + * @return
> >>>>> + *   The number of direct-rearmed buffers.
> >>>>> + */
> >>>>> +__rte_experimental
> >>>>> +static __rte_always_inline uint16_t
> >>>>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
> >>>>> +		struct rte_eth_txq_data *txq_data) {
> >>>>> +	uint16_t nb_rearm;
> >>>>> +	struct rte_eth_fp_ops *p;
> >>>>> +	void *qd;
> >>>>> +
> >>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>>> +	if (port_id >= RTE_MAX_ETHPORTS ||
> >>>>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT)
> {
> >>>>> +		RTE_ETHDEV_LOG(ERR,
> >>>>> +			"Invalid port_id=%u or queue_id=%u\n",
> >>>>> +			port_id, queue_id);
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +#endif
> >>>>> +
> >>>>> +	p = &rte_eth_fp_ops[port_id];
> >>>>> +	qd = p->rxq.data[queue_id];
> >>>>> +
> >>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
> >>>>> +
> >>>>> +	if (qd == NULL) {
> >>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> >>>> port_id=%u\n",
> >>>>> +			queue_id, port_id);
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	if (!p->rx_direct_rearm)
> >>>>
> >>>> This check should be done always (unconditionally).
> >>>> it is not a mandatory function for the driver (it can safely skip
> >>>> to
> >> implement it).
> >>>>
> >>>>> +		return -ENOTSUP;
> >>>>
> >>>> This function returns uint16_t, why signed integers here?
> >>>>
> >>>>
> >>>>> +#endif
> >>>>> +
> >>>>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
> >>>>
> >>>> So rx_direct_rearm() function knows how to extract data from TX
> queue?
> >>>> As I understand that is possible only in one case:
> >>>> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
> >>>> That means that rxq and txq have to belong to the same driver and
> >>>> device type.
> >>>>
> >>> Thanks for the comments, and I have some questions for this.
> >>>
> >>>> First of all, I still think it is not the best design choice.
> >>>> If we going ahead with introducing this feature, it better be as
> >>>> generic as possible.
> >>>> Plus it mixes TX and RX code-paths together, while it would be much
> >>>> better to to keep them independent as they are right now.
> >>>>
> >>>> Another thing with such approach - even for the same PMD, both TXQ
> >>>> and RXQ can have different internal data format and behavior logic
> >>>> (depending on port/queue configuration).
> >>> 1. Here TXQ and RXQ have different internal format means the queue
> >>> type and  descs can be different, right? If I understand correctly,
> >>> based on your first strategy, is it means we will need different
> >>> 'rearm_func' for different queue type in the same PMD?
> >>
> >> Yes, I think so.
> >> If let say we have some PMD where depending on the config, there
> >> cpuld be
> >> 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq
> >> formats: txq_c, txq_d.
> >> Then assuming PMD would like to support direct-rearm mode for all
> >> four combinations, it needs 4 different rearm functions:
> >>
> >> rearm_txq_c_to_rxq_a()
> >> rearm_txq_c_to_rxq_b()
> >> rearm_txq_d_to_rxq_a()
> >> rearm_txq_d_to_rxq_b()
> >>
> > Thank you for your detailed explanation, I can understand this.

> >>
> >>>
> >>>> So rx_direct_rearm() function selection have to be done based on
> >>>> both RXQ and TXQ config.
> >>>> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
> >>>> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
> >>>> rx_queue, tx_port, tx_queue);
> >>>> Then, it will be user responsibility to store it somewhere and call
> >> periodically:
> >>>>
> >>>> control_path:
> >>>> 	...
> >>>> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
> >>>> 		 txport, txqueue);
> >>>> data-path:
> >>>> 	while(...) {
> >>>> 		rearm_func(rxport, txport, rxqueue, txqueue);
> >>>> 		rte_eth_rx_burst(rxport, rxqueue, ....);
> >>>> 		rte_eth_tx_burst(txport, txqueue, ....);
> >>>> 	}
> >>>>
> >>>>
> >>>> In that case there seems absolutely no point to introduce struct
> >>>> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
> >>>> directly anyway.
> >>> 2. This is a very good proposal and it will be our first choice.
> >>> Before working on it, I have a few questions about how to implement
> >> 'rearm_func'.
> >>> Like you say above, mixed Rx and Tx path code in 'rearm_func' means
> >>> the hard-code is mixed like:
> >>> rearm_func(...) {
> >>>        ...
> >>>       txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
> >>>       for (...) {
> >>>          rxep[i].mbuf = txep[i].mbuf;
> >>>          mb0 = txep[i].mbuf;
> >>>          paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
> >>>         dma_addr0 = vdupq_n_u64(paddr);
> >>>         vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
> >>>       }
> >>> }
> >>> Is my understanding is right?
> >>
> >>
> >> Sorry, I don't understand the question.
> >> Can you probably elaborate a bit?
> >
> > Sorry for my unclear expression.
> >
> > I mean if we need two func which contains tx and rx paths code
> respectively in rearm_func, like:
> > rearm_func(...) {
> >           rte_tx_fill_sw_ring;
> >           rte_rx_rearm_descs;
> > }
> >
> > Or just mixed tx and rx path code like I said before. I prefer 'rx and
> > tx hard code mixed', because from the performance perspective, this can
> reduce the cost of function calls.
> 
> I suppose it depends on what we choose:
> If we restrict it in a way that rxq and txq have to belong to the same PMD,
> then I suppose this decision could be left to each particular PMD.
> If we'd like to allow rearm to work accross different PMDs (i.e. it would allow
> to rearm mlx with ice and visa-versa), then yes we need PMDs somehow to
> expose rx_sw_ring abstraction to each other.
> My preference would be the second one - as it will make this feature more
> flexible and would help to adopt it more widely.
> Though the first one is probably easier to implement, and as I udnerstand
> you are leaning towards the first one.
> 
> Konstantin

Hi, Konstantin

1. After further consideration, I think we should give up  'rte_eth_get_rx_direct_rearm'  functions.
 And just keep one API 'rte_eth_direct_rearm' which contains different queue types path in the pmd driver.

This is due to that application can dynamically configure queue-mapping in the data path,
and thus expand direct-rearm usage scenarios based on this way. For example, consider a flow
which received by one rx_queue(rxq_1) and sent by two different types tx_queues( type_a txq_1 and type_b txq_2), 
Users can dynamically map direct_rearm according to tx path.
--------------------------------------------------------------------------------------
rte_eth_rx_burst(rxq_1);

%routing table lookup to decide tx queue%
txq_n = txq_1 or txq_2?

rte_eth_tx_burst(txq_n);
rte_eth_direct_rearm(txq_n);
--------------------------------------------------------------------------------------
Thus, this can avoid repeatedly calling 'rte_eth_get_rx_direct_rearm' in the data path to reduce performance.

Furthermore, rte_eth_direct_rearm can be implemented as:
--------------------------------------------------------------------------------------
rte_eth_direct_rearm = i40e_eth_direct_rearm;
i40e_eth_direct_rearm {
	rearm_queue_config_a path;
	rearm_queue_config_b path;
	rearm_queue_config_c path;
	rearm_queue_config_d path;
}
--------------------------------------------------------------------------------------
This implementation refers to 'rte_eth_rx_burst' and 'rte_eth_tx_burst'. If there will be different
queue types, the implementation will be in pmd layer.

2. I'm not sure whether cross pmd usage is realistic. 
Consider the tradeoff between performance and different pmd map, maybe we can provide two
different direct rearm mode for users. One is mixed Rx and Tx path and is used for the same pmd.
The other is separate Rx and Tx path, and is used for different pmd. 

Best Regards
Feifei
> 
> 
> >>
> >>>
> >>>>
> >>>> Another way - make rte_eth_txq_data totally opaque and allow PMD
> to
> >>>> store there some data that will help it to distinguish expected TXQ
> format.
> >>>> That will allow PMD to keep rx_direct_rearm() the same for all
> >>>> supported TXQ formats (it will make decision internally based on
> >>>> data stored in txq_data).
> >>>> Though in that case you'll probably need one more dev-op to free
> >> txq_data.
> >>>
> >
  
Konstantin Ananyev Oct. 31, 2022, 4:36 p.m. UTC | #7
Hi Feifei,


>>>>>>> Add API for enabling direct rearm mode and for mapping RX and TX
>>>>>>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
>>>>>>>
>>>>>>> Furthermore, to avoid Rx load Tx data directly, add API called
>>>>>>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
>>>>>>>
>>>>>>> Suggested-by: Honnappa Nagarahalli
>> <honnappa.nagarahalli@arm.com>
>>>>>>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
>>>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>>>> Reviewed-by: Honnappa Nagarahalli
>> <honnappa.nagarahalli@arm.com>
>>>>>>> ---
>>>>>>>      lib/ethdev/ethdev_driver.h   |  9 ++++
>>>>>>>      lib/ethdev/ethdev_private.c  |  1 +
>>>>>>>      lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
>>>>>>>      lib/ethdev/rte_ethdev.h      | 95
>>>>>> ++++++++++++++++++++++++++++++++++++
>>>>>>>      lib/ethdev/rte_ethdev_core.h |  5 ++
>>>>>>>      lib/ethdev/version.map       |  4 ++
>>>>>>>      6 files changed, 151 insertions(+)
>>>>>>>
>>>>>>> diff --git a/lib/ethdev/ethdev_driver.h
>>>>>>> b/lib/ethdev/ethdev_driver.h index 47a55a419e..14f52907c1 100644
>>>>>>> --- a/lib/ethdev/ethdev_driver.h
>>>>>>> +++ b/lib/ethdev/ethdev_driver.h
>>>>>>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
>>>>>>>      	eth_rx_descriptor_status_t rx_descriptor_status;
>>>>>>>      	/** Check the status of a Tx descriptor */
>>>>>>>      	eth_tx_descriptor_status_t tx_descriptor_status;
>>>>>>> +	/**  Use Tx mbufs for Rx to rearm */
>>>>>>> +	eth_rx_direct_rearm_t rx_direct_rearm;
>>>>>>>
>>>>>>>      	/**
>>>>>>>      	 * Device data that is shared between primary and secondary
>>>>>>> processes @@ -486,6 +488,11 @@ typedef int
>>>>>> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>>>>>>>      typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
>>>>>>>      				    uint16_t rx_queue_id);
>>>>>>>
>>>>>>> +/**< @internal Get Tx information of a transmit queue of an
>>>>>>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct
>>>> rte_eth_dev *dev,
>>>>>>> +				      uint16_t tx_queue_id,
>>>>>>> +				      struct rte_eth_txq_data
>> *txq_data);
>>>>>>> +
>>>>>>>      /** @internal Release memory resources allocated by given
>>>>>>> Rx/Tx
>>>> queue.
>>>>>> */
>>>>>>>      typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
>>>>>>>      				    uint16_t queue_id);
>>>>>>> @@ -1138,6 +1145,8 @@ struct eth_dev_ops {
>>>>>>>      	eth_rxq_info_get_t         rxq_info_get;
>>>>>>>      	/** Retrieve Tx queue information */
>>>>>>>      	eth_txq_info_get_t         txq_info_get;
>>>>>>> +	/** Get the address where Tx data is stored */
>>>>>>> +	eth_txq_data_get_t         txq_data_get;
>>>>>>>      	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx
>> burst
>>>>>> mode */
>>>>>>>      	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx
>> burst
>>>>>> mode */
>>>>>>>      	eth_fw_version_get_t       fw_version_get; /**< Get
>> firmware
>>>>>> version */
>>>>>>> diff --git a/lib/ethdev/ethdev_private.c
>>>>>>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77 100644
>>>>>>> --- a/lib/ethdev/ethdev_private.c
>>>>>>> +++ b/lib/ethdev/ethdev_private.c
>>>>>>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct
>> rte_eth_fp_ops
>>>>>> *fpo,
>>>>>>>      	fpo->rx_queue_count = dev->rx_queue_count;
>>>>>>>      	fpo->rx_descriptor_status = dev->rx_descriptor_status;
>>>>>>>      	fpo->tx_descriptor_status = dev->tx_descriptor_status;
>>>>>>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
>>>>>>>
>>>>>>>      	fpo->rxq.data = dev->data->rx_queues;
>>>>>>>      	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
>>>>>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
>>>>>>> index 0c2c1088c0..0dccec2e4b 100644
>>>>>>> --- a/lib/ethdev/rte_ethdev.c
>>>>>>> +++ b/lib/ethdev/rte_ethdev.c
>>>>>>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t port_id)
>>>>>>>      	return ret;
>>>>>>>      }
>>>>>>>
>>>>>>> +int
>>>>>>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
>>>>>>> +			struct rte_eth_txq_data *txq_data) {
>>>>>>> +	struct rte_eth_dev *dev;
>>>>>>> +
>>>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>>>>>> +	dev = &rte_eth_devices[port_id];
>>>>>>> +
>>>>>>> +	if (queue_id >= dev->data->nb_tx_queues) {
>>>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
>>>>>> queue_id);
>>>>>>> +		return -EINVAL;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (txq_data == NULL) {
>>>>>>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u
>> Tx
>>>>>> queue %u data to NULL\n",
>>>>>>> +			port_id, queue_id);
>>>>>>> +		return -EINVAL;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (dev->data->tx_queues == NULL ||
>>>>>>> +			dev->data->tx_queues[queue_id] == NULL) {
>>>>>>> +		RTE_ETHDEV_LOG(ERR,
>>>>>>> +			   "Tx queue %"PRIu16" of device with
>> port_id=%"
>>>>>>> +			   PRIu16" has not been setup\n",
>>>>>>> +			   queue_id, port_id);
>>>>>>> +		return -EINVAL;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (*dev->dev_ops->txq_data_get == NULL)
>>>>>>> +		return -ENOTSUP;
>>>>>>> +
>>>>>>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
>>>>>>> +
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>      static int
>>>>>>>      rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split
>>>> *rx_seg,
>>>>>>>      			     uint16_t n_seg, uint32_t *mbp_buf_size,
>> diff --git
>>>>>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
>>>>>>> 2e783536c1..daf7f05d62 100644
>>>>>>> --- a/lib/ethdev/rte_ethdev.h
>>>>>>> +++ b/lib/ethdev/rte_ethdev.h
>>>>>>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
>>>>>>>      	uint8_t queue_state;        /**< one of
>> RTE_ETH_QUEUE_STATE_*. */
>>>>>>>      } __rte_cache_min_aligned;
>>>>>>>
>>>>>>> +/**
>>>>>>> + * @internal
>>>>>>> + * Structure used to hold pointers to internal ethdev Tx data.
>>>>>>> + * The main purpose is to load and store Tx queue data in direct
>>>>>>> +rearm
>>>>>> mode.
>>>>>>> + */
>>>>>>> +struct rte_eth_txq_data {
>>>>>>> +	uint64_t *offloads;
>>>>>>> +	void *tx_sw_ring;
>>>>>>> +	volatile void *tx_ring;
>>>>>>> +	uint16_t *tx_next_dd;
>>>>>>> +	uint16_t *nb_tx_free;
>>>>>>> +	uint16_t nb_tx_desc;
>>>>>>> +	uint16_t tx_rs_thresh;
>>>>>>> +	uint16_t tx_free_thresh;
>>>>>>> +} __rte_cache_min_aligned;
>>>>>>> +
>>>>>>
>>>>>> first of all it is not clear why this struct has to be in public
>>>>>> header, why it can't be in on of ethdev 'private' headers.
>>>>>> Second it looks like a snippet from private txq fields for some
>>>>>> Intel (and alike) PMDs (i40e, ice, etc.).
>>>>>> How it supposed to to be universal and be applicable for any PMD
>>>>>> that decides to implement this new API?
>>>>>>
>>>>>>
>>>>>>>      /* Generic Burst mode flag definition, values can be ORed. */
>>>>>>>
>>>>>>>      /**
>>>>>>> @@ -4718,6 +4735,27 @@ int rte_eth_remove_rx_callback(uint16_t
>>>>>> port_id, uint16_t queue_id,
>>>>>>>      int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t
>> queue_id,
>>>>>>>      		const struct rte_eth_rxtx_callback *user_cb);
>>>>>>>
>>>>>>> +/**
>>>>>>> + * Get the address which Tx data is stored.
>>>>>>> + *
>>>>>>> + * @param port_id
>>>>>>> + *   The port identifier of the Ethernet device.
>>>>>>> + * @param queue_id
>>>>>>> + *   The Tx queue on the Ethernet device for which information
>>>>>>> + *   will be retrieved.
>>>>>>> + * @param txq_data
>>>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>>>>>> + *
>>>>>>> + * @return
>>>>>>> + *   - 0: Success
>>>>>>> + *   - -ENODEV:  If *port_id* is invalid.
>>>>>>> + *   - -ENOTSUP: routine is not supported by the device PMD.
>>>>>>> + *   - -EINVAL:  The queue_id is out of range.
>>>>>>> + */
>>>>>>> +__rte_experimental
>>>>>>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t
>> queue_id,
>>>>>>> +		struct rte_eth_txq_data *txq_data);
>>>>>>> +
>>>>>>>      /**
>>>>>>>       * Retrieve information about given port's Rx queue.
>>>>>>>       *
>>>>>>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id,
>>>>>>> uint16_t
>>>>>> queue_id,
>>>>>>>      	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>>>>>>>      }
>>>>>>>
>>>>>>> +/**
>>>>>>> + * @warning
>>>>>>> + * @b EXPERIMENTAL: this API may change, or be removed, without
>>>>>>> +prior notice
>>>>>>> + *
>>>>>>> + * Put Tx buffers into Rx sw-ring and rearm descs.
>>>>>>> + *
>>>>>>> + * @param port_id
>>>>>>> + *   Port identifying the receive side.
>>>>>>> + * @param queue_id
>>>>>>> + *   The index of the transmit queue identifying the receive side.
>>>>>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
>>>>>> supplied
>>>>>>> + *   to rte_eth_dev_configure().
>>>>>>> + * @param txq_data
>>>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
>>>>>>> + * @return
>>>>>>> + *   The number of direct-rearmed buffers.
>>>>>>> + */
>>>>>>> +__rte_experimental
>>>>>>> +static __rte_always_inline uint16_t
>>>>>>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
>>>>>>> +		struct rte_eth_txq_data *txq_data) {
>>>>>>> +	uint16_t nb_rearm;
>>>>>>> +	struct rte_eth_fp_ops *p;
>>>>>>> +	void *qd;
>>>>>>> +
>>>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>>>>> +	if (port_id >= RTE_MAX_ETHPORTS ||
>>>>>>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT)
>> {
>>>>>>> +		RTE_ETHDEV_LOG(ERR,
>>>>>>> +			"Invalid port_id=%u or queue_id=%u\n",
>>>>>>> +			port_id, queue_id);
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +#endif
>>>>>>> +
>>>>>>> +	p = &rte_eth_fp_ops[port_id];
>>>>>>> +	qd = p->rxq.data[queue_id];
>>>>>>> +
>>>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
>>>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
>>>>>>> +
>>>>>>> +	if (qd == NULL) {
>>>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
>>>>>> port_id=%u\n",
>>>>>>> +			queue_id, port_id);
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (!p->rx_direct_rearm)
>>>>>>
>>>>>> This check should be done always (unconditionally).
>>>>>> it is not a mandatory function for the driver (it can safely skip
>>>>>> to
>>>> implement it).
>>>>>>
>>>>>>> +		return -ENOTSUP;
>>>>>>
>>>>>> This function returns uint16_t, why signed integers here?
>>>>>>
>>>>>>
>>>>>>> +#endif
>>>>>>> +
>>>>>>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
>>>>>>
>>>>>> So rx_direct_rearm() function knows how to extract data from TX
>> queue?
>>>>>> As I understand that is possible only in one case:
>>>>>> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
>>>>>> That means that rxq and txq have to belong to the same driver and
>>>>>> device type.
>>>>>>
>>>>> Thanks for the comments, and I have some questions for this.
>>>>>
>>>>>> First of all, I still think it is not the best design choice.
>>>>>> If we going ahead with introducing this feature, it better be as
>>>>>> generic as possible.
>>>>>> Plus it mixes TX and RX code-paths together, while it would be much
>>>>>> better to to keep them independent as they are right now.
>>>>>>
>>>>>> Another thing with such approach - even for the same PMD, both TXQ
>>>>>> and RXQ can have different internal data format and behavior logic
>>>>>> (depending on port/queue configuration).
>>>>> 1. Here TXQ and RXQ have different internal format means the queue
>>>>> type and  descs can be different, right? If I understand correctly,
>>>>> based on your first strategy, is it means we will need different
>>>>> 'rearm_func' for different queue type in the same PMD?
>>>>
>>>> Yes, I think so.
>>>> If let say we have some PMD where depending on the config, there
>>>> cpuld be
>>>> 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq
>>>> formats: txq_c, txq_d.
>>>> Then assuming PMD would like to support direct-rearm mode for all
>>>> four combinations, it needs 4 different rearm functions:
>>>>
>>>> rearm_txq_c_to_rxq_a()
>>>> rearm_txq_c_to_rxq_b()
>>>> rearm_txq_d_to_rxq_a()
>>>> rearm_txq_d_to_rxq_b()
>>>>
>>> Thank you for your detailed explanation, I can understand this.
> 
>>>>
>>>>>
>>>>>> So rx_direct_rearm() function selection have to be done based on
>>>>>> both RXQ and TXQ config.
>>>>>> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
>>>>>> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
>>>>>> rx_queue, tx_port, tx_queue);
>>>>>> Then, it will be user responsibility to store it somewhere and call
>>>> periodically:
>>>>>>
>>>>>> control_path:
>>>>>> 	...
>>>>>> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport, rxqueue,
>>>>>> 		 txport, txqueue);
>>>>>> data-path:
>>>>>> 	while(...) {
>>>>>> 		rearm_func(rxport, txport, rxqueue, txqueue);
>>>>>> 		rte_eth_rx_burst(rxport, rxqueue, ....);
>>>>>> 		rte_eth_tx_burst(txport, txqueue, ....);
>>>>>> 	}
>>>>>>
>>>>>>
>>>>>> In that case there seems absolutely no point to introduce struct
>>>>>> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
>>>>>> directly anyway.
>>>>> 2. This is a very good proposal and it will be our first choice.
>>>>> Before working on it, I have a few questions about how to implement
>>>> 'rearm_func'.
>>>>> Like you say above, mixed Rx and Tx path code in 'rearm_func' means
>>>>> the hard-code is mixed like:
>>>>> rearm_func(...) {
>>>>>         ...
>>>>>        txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
>>>>>        for (...) {
>>>>>           rxep[i].mbuf = txep[i].mbuf;
>>>>>           mb0 = txep[i].mbuf;
>>>>>           paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
>>>>>          dma_addr0 = vdupq_n_u64(paddr);
>>>>>          vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
>>>>>        }
>>>>> }
>>>>> Is my understanding is right?
>>>>
>>>>
>>>> Sorry, I don't understand the question.
>>>> Can you probably elaborate a bit?
>>>
>>> Sorry for my unclear expression.
>>>
>>> I mean if we need two func which contains tx and rx paths code
>> respectively in rearm_func, like:
>>> rearm_func(...) {
>>>            rte_tx_fill_sw_ring;
>>>            rte_rx_rearm_descs;
>>> }
>>>
>>> Or just mixed tx and rx path code like I said before. I prefer 'rx and
>>> tx hard code mixed', because from the performance perspective, this can
>> reduce the cost of function calls.
>>
>> I suppose it depends on what we choose:
>> If we restrict it in a way that rxq and txq have to belong to the same PMD,
>> then I suppose this decision could be left to each particular PMD.
>> If we'd like to allow rearm to work accross different PMDs (i.e. it would allow
>> to rearm mlx with ice and visa-versa), then yes we need PMDs somehow to
>> expose rx_sw_ring abstraction to each other.
>> My preference would be the second one - as it will make this feature more
>> flexible and would help to adopt it more widely.
>> Though the first one is probably easier to implement, and as I udnerstand
>> you are leaning towards the first one.
>>
>> Konstantin
> 
> Hi, Konstantin
> 
> 1. After further consideration, I think we should give up  'rte_eth_get_rx_direct_rearm'  functions.
>   And just keep one API 'rte_eth_direct_rearm' which contains different queue types path in the pmd driver.
> 
> This is due to that application can dynamically configure queue-mapping in the data path,
> and thus expand direct-rearm usage scenarios based on this way. For example, consider a flow
> which received by one rx_queue(rxq_1) and sent by two different types tx_queues( type_a txq_1 and type_b txq_2),
> Users can dynamically map direct_rearm according to tx path.

That's good point, indeed for such case special function pointer doesn't 
look very conveniet.
I still think that the best way to deal with multiple sources would be
to expose some sort of rx_sw_ring abstraction.
Then it could be obtaiend once and passed as a paramter to actual rearm 
function.
In that case, user can even re-arm RX queue not from TX queue only,
but from other sources too (freed mbufs, etc.).
As I understand what you suggest is one unified function:
int rte_eth_rx_direct_rearm(rx_port, rx_queue, tx_port, tx_queue), correct?
I am not against it in principle, but does it mean at each invocation,
inside PMD layer, first thing you would have to do -
check that rx_queue and tx_queue are from the same device type and 
select proper behaviour (for different queue configs)?

> --------------------------------------------------------------------------------------
> rte_eth_rx_burst(rxq_1);
> 
> %routing table lookup to decide tx queue%
> txq_n = txq_1 or txq_2?
> 
> rte_eth_tx_burst(txq_n);
> rte_eth_direct_rearm(txq_n);
> --------------------------------------------------------------------------------------
> Thus, this can avoid repeatedly calling 'rte_eth_get_rx_direct_rearm' in the data path to reduce performance.
> 
> Furthermore, rte_eth_direct_rearm can be implemented as:
> --------------------------------------------------------------------------------------
> rte_eth_direct_rearm = i40e_eth_direct_rearm;
> i40e_eth_direct_rearm {
> 	rearm_queue_config_a path;
> 	rearm_queue_config_b path;
> 	rearm_queue_config_c path;
> 	rearm_queue_config_d path;
> }
> --------------------------------------------------------------------------------------
> This implementation refers to 'rte_eth_rx_burst' and 'rte_eth_tx_burst'. If there will be different
> queue types, the implementation will be in pmd layer.
> 
> 2. I'm not sure whether cross pmd usage is realistic.
> Consider the tradeoff between performance and different pmd map, maybe we can provide two
> different direct rearm mode for users. One is mixed Rx and Tx path and is used for the same pmd.
> The other is separate Rx and Tx path, and is used for different pmd.
> Best Regards
> Feifei
>>
>>
>>>>
>>>>>
>>>>>>
>>>>>> Another way - make rte_eth_txq_data totally opaque and allow PMD
>> to
>>>>>> store there some data that will help it to distinguish expected TXQ
>> format.
>>>>>> That will allow PMD to keep rx_direct_rearm() the same for all
>>>>>> supported TXQ formats (it will make decision internally based on
>>>>>> data stored in txq_data).
>>>>>> Though in that case you'll probably need one more dev-op to free
>>>> txq_data.
>>>>>
>>>
>
  
Feifei Wang Jan. 4, 2023, 7:39 a.m. UTC | #8
Hi, Konstantin

> -----邮件原件-----
> 发件人: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> 发送时间: Tuesday, November 1, 2022 12:37 AM
> 收件人: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net;
> Ferruh Yigit <ferruh.yigit@xilinx.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Ray Kinsella <mdr@ashroe.eu>
> 抄送: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> 主题: Re: 回复: 回复: 回复: [PATCH v2 1/3] ethdev: add API for direct rearm
> mode
> 
> 
> 
> Hi Feifei,
> 
> 
> >>>>>>> Add API for enabling direct rearm mode and for mapping RX and TX
> >>>>>>> queues. Currently, the API supports 1:1(txq : rxq) mapping.
> >>>>>>>
> >>>>>>> Furthermore, to avoid Rx load Tx data directly, add API called
> >>>>>>> 'rte_eth_txq_data_get' to get Tx sw_ring and its information.
> >>>>>>>
> >>>>>>> Suggested-by: Honnappa Nagarahalli
> >> <honnappa.nagarahalli@arm.com>
> >>>>>>> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>>>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> >>>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>>>>> Reviewed-by: Honnappa Nagarahalli
> >> <honnappa.nagarahalli@arm.com>
> >>>>>>> ---
> >>>>>>>      lib/ethdev/ethdev_driver.h   |  9 ++++
> >>>>>>>      lib/ethdev/ethdev_private.c  |  1 +
> >>>>>>>      lib/ethdev/rte_ethdev.c      | 37 ++++++++++++++
> >>>>>>>      lib/ethdev/rte_ethdev.h      | 95
> >>>>>> ++++++++++++++++++++++++++++++++++++
> >>>>>>>      lib/ethdev/rte_ethdev_core.h |  5 ++
> >>>>>>>      lib/ethdev/version.map       |  4 ++
> >>>>>>>      6 files changed, 151 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/lib/ethdev/ethdev_driver.h
> >>>>>>> b/lib/ethdev/ethdev_driver.h index 47a55a419e..14f52907c1
> 100644
> >>>>>>> --- a/lib/ethdev/ethdev_driver.h
> >>>>>>> +++ b/lib/ethdev/ethdev_driver.h
> >>>>>>> @@ -58,6 +58,8 @@ struct rte_eth_dev {
> >>>>>>>      	eth_rx_descriptor_status_t rx_descriptor_status;
> >>>>>>>      	/** Check the status of a Tx descriptor */
> >>>>>>>      	eth_tx_descriptor_status_t tx_descriptor_status;
> >>>>>>> +	/**  Use Tx mbufs for Rx to rearm */
> >>>>>>> +	eth_rx_direct_rearm_t rx_direct_rearm;
> >>>>>>>
> >>>>>>>      	/**
> >>>>>>>      	 * Device data that is shared between primary and
> >>>>>>> secondary processes @@ -486,6 +488,11 @@ typedef int
> >>>>>> (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >>>>>>>      typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
> >>>>>>>      				    uint16_t rx_queue_id);
> >>>>>>>
> >>>>>>> +/**< @internal Get Tx information of a transmit queue of an
> >>>>>>> +Ethernet device. */ typedef void (*eth_txq_data_get_t)(struct
> >>>> rte_eth_dev *dev,
> >>>>>>> +				      uint16_t tx_queue_id,
> >>>>>>> +				      struct rte_eth_txq_data
> >> *txq_data);
> >>>>>>> +
> >>>>>>>      /** @internal Release memory resources allocated by given
> >>>>>>> Rx/Tx
> >>>> queue.
> >>>>>> */
> >>>>>>>      typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
> >>>>>>>      				    uint16_t queue_id); @@ -1138,6
> +1145,8 @@ struct
> >>>>>>> eth_dev_ops {
> >>>>>>>      	eth_rxq_info_get_t         rxq_info_get;
> >>>>>>>      	/** Retrieve Tx queue information */
> >>>>>>>      	eth_txq_info_get_t         txq_info_get;
> >>>>>>> +	/** Get the address where Tx data is stored */
> >>>>>>> +	eth_txq_data_get_t         txq_data_get;
> >>>>>>>      	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx
> >> burst
> >>>>>> mode */
> >>>>>>>      	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx
> >> burst
> >>>>>> mode */
> >>>>>>>      	eth_fw_version_get_t       fw_version_get; /**< Get
> >> firmware
> >>>>>> version */
> >>>>>>> diff --git a/lib/ethdev/ethdev_private.c
> >>>>>>> b/lib/ethdev/ethdev_private.c index 48090c879a..bfe16c7d77
> >>>>>>> 100644
> >>>>>>> --- a/lib/ethdev/ethdev_private.c
> >>>>>>> +++ b/lib/ethdev/ethdev_private.c
> >>>>>>> @@ -276,6 +276,7 @@ eth_dev_fp_ops_setup(struct
> >> rte_eth_fp_ops
> >>>>>> *fpo,
> >>>>>>>      	fpo->rx_queue_count = dev->rx_queue_count;
> >>>>>>>      	fpo->rx_descriptor_status = dev->rx_descriptor_status;
> >>>>>>>      	fpo->tx_descriptor_status = dev->tx_descriptor_status;
> >>>>>>> +	fpo->rx_direct_rearm = dev->rx_direct_rearm;
> >>>>>>>
> >>>>>>>      	fpo->rxq.data = dev->data->rx_queues;
> >>>>>>>      	fpo->rxq.clbk = (void
> >>>>>>> **)(uintptr_t)dev->post_rx_burst_cbs;
> >>>>>>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> >>>>>>> index 0c2c1088c0..0dccec2e4b 100644
> >>>>>>> --- a/lib/ethdev/rte_ethdev.c
> >>>>>>> +++ b/lib/ethdev/rte_ethdev.c
> >>>>>>> @@ -1648,6 +1648,43 @@ rte_eth_dev_is_removed(uint16_t
> port_id)
> >>>>>>>      	return ret;
> >>>>>>>      }
> >>>>>>>
> >>>>>>> +int
> >>>>>>> +rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
> >>>>>>> +			struct rte_eth_txq_data *txq_data) {
> >>>>>>> +	struct rte_eth_dev *dev;
> >>>>>>> +
> >>>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> >>>>>>> +	dev = &rte_eth_devices[port_id];
> >>>>>>> +
> >>>>>>> +	if (queue_id >= dev->data->nb_tx_queues) {
> >>>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n",
> >>>>>> queue_id);
> >>>>>>> +		return -EINVAL;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>> +	if (txq_data == NULL) {
> >>>>>>> +		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u
> >> Tx
> >>>>>> queue %u data to NULL\n",
> >>>>>>> +			port_id, queue_id);
> >>>>>>> +		return -EINVAL;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>> +	if (dev->data->tx_queues == NULL ||
> >>>>>>> +			dev->data->tx_queues[queue_id] == NULL) {
> >>>>>>> +		RTE_ETHDEV_LOG(ERR,
> >>>>>>> +			   "Tx queue %"PRIu16" of device with
> >> port_id=%"
> >>>>>>> +			   PRIu16" has not been setup\n",
> >>>>>>> +			   queue_id, port_id);
> >>>>>>> +		return -EINVAL;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>> +	if (*dev->dev_ops->txq_data_get == NULL)
> >>>>>>> +		return -ENOTSUP;
> >>>>>>> +
> >>>>>>> +	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
> >>>>>>> +
> >>>>>>> +	return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>      static int
> >>>>>>>      rte_eth_rx_queue_check_split(const struct
> >>>>>>> rte_eth_rxseg_split
> >>>> *rx_seg,
> >>>>>>>      			     uint16_t n_seg, uint32_t *mbp_buf_size,
> >> diff --git
> >>>>>>> a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> >>>>>>> 2e783536c1..daf7f05d62 100644
> >>>>>>> --- a/lib/ethdev/rte_ethdev.h
> >>>>>>> +++ b/lib/ethdev/rte_ethdev.h
> >>>>>>> @@ -1949,6 +1949,23 @@ struct rte_eth_txq_info {
> >>>>>>>      	uint8_t queue_state;        /**< one of
> >> RTE_ETH_QUEUE_STATE_*. */
> >>>>>>>      } __rte_cache_min_aligned;
> >>>>>>>
> >>>>>>> +/**
> >>>>>>> + * @internal
> >>>>>>> + * Structure used to hold pointers to internal ethdev Tx data.
> >>>>>>> + * The main purpose is to load and store Tx queue data in
> >>>>>>> +direct rearm
> >>>>>> mode.
> >>>>>>> + */
> >>>>>>> +struct rte_eth_txq_data {
> >>>>>>> +	uint64_t *offloads;
> >>>>>>> +	void *tx_sw_ring;
> >>>>>>> +	volatile void *tx_ring;
> >>>>>>> +	uint16_t *tx_next_dd;
> >>>>>>> +	uint16_t *nb_tx_free;
> >>>>>>> +	uint16_t nb_tx_desc;
> >>>>>>> +	uint16_t tx_rs_thresh;
> >>>>>>> +	uint16_t tx_free_thresh;
> >>>>>>> +} __rte_cache_min_aligned;
> >>>>>>> +
> >>>>>>
> >>>>>> first of all it is not clear why this struct has to be in public
> >>>>>> header, why it can't be in on of ethdev 'private' headers.
> >>>>>> Second it looks like a snippet from private txq fields for some
> >>>>>> Intel (and alike) PMDs (i40e, ice, etc.).
> >>>>>> How it supposed to to be universal and be applicable for any PMD
> >>>>>> that decides to implement this new API?
> >>>>>>
> >>>>>>
> >>>>>>>      /* Generic Burst mode flag definition, values can be ORed.
> >>>>>>> */
> >>>>>>>
> >>>>>>>      /**
> >>>>>>> @@ -4718,6 +4735,27 @@ int
> rte_eth_remove_rx_callback(uint16_t
> >>>>>> port_id, uint16_t queue_id,
> >>>>>>>      int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t
> >> queue_id,
> >>>>>>>      		const struct rte_eth_rxtx_callback *user_cb);
> >>>>>>>
> >>>>>>> +/**
> >>>>>>> + * Get the address which Tx data is stored.
> >>>>>>> + *
> >>>>>>> + * @param port_id
> >>>>>>> + *   The port identifier of the Ethernet device.
> >>>>>>> + * @param queue_id
> >>>>>>> + *   The Tx queue on the Ethernet device for which information
> >>>>>>> + *   will be retrieved.
> >>>>>>> + * @param txq_data
> >>>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be
> filled.
> >>>>>>> + *
> >>>>>>> + * @return
> >>>>>>> + *   - 0: Success
> >>>>>>> + *   - -ENODEV:  If *port_id* is invalid.
> >>>>>>> + *   - -ENOTSUP: routine is not supported by the device PMD.
> >>>>>>> + *   - -EINVAL:  The queue_id is out of range.
> >>>>>>> + */
> >>>>>>> +__rte_experimental
> >>>>>>> +int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t
> >> queue_id,
> >>>>>>> +		struct rte_eth_txq_data *txq_data);
> >>>>>>> +
> >>>>>>>      /**
> >>>>>>>       * Retrieve information about given port's Rx queue.
> >>>>>>>       *
> >>>>>>> @@ -6209,6 +6247,63 @@ rte_eth_tx_buffer(uint16_t port_id,
> >>>>>>> uint16_t
> >>>>>> queue_id,
> >>>>>>>      	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
> >>>>>>>      }
> >>>>>>>
> >>>>>>> +/**
> >>>>>>> + * @warning
> >>>>>>> + * @b EXPERIMENTAL: this API may change, or be removed,
> without
> >>>>>>> +prior notice
> >>>>>>> + *
> >>>>>>> + * Put Tx buffers into Rx sw-ring and rearm descs.
> >>>>>>> + *
> >>>>>>> + * @param port_id
> >>>>>>> + *   Port identifying the receive side.
> >>>>>>> + * @param queue_id
> >>>>>>> + *   The index of the transmit queue identifying the receive side.
> >>>>>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
> >>>>>> supplied
> >>>>>>> + *   to rte_eth_dev_configure().
> >>>>>>> + * @param txq_data
> >>>>>>> + *   A pointer to a structure of type *rte_eth_txq_data* to be
> filled.
> >>>>>>> + * @return
> >>>>>>> + *   The number of direct-rearmed buffers.
> >>>>>>> + */
> >>>>>>> +__rte_experimental
> >>>>>>> +static __rte_always_inline uint16_t
> >>>>>>> +rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
> >>>>>>> +		struct rte_eth_txq_data *txq_data) {
> >>>>>>> +	uint16_t nb_rearm;
> >>>>>>> +	struct rte_eth_fp_ops *p;
> >>>>>>> +	void *qd;
> >>>>>>> +
> >>>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>>>>> +	if (port_id >= RTE_MAX_ETHPORTS ||
> >>>>>>> +			queue_id >= RTE_MAX_QUEUES_PER_PORT)
> >> {
> >>>>>>> +		RTE_ETHDEV_LOG(ERR,
> >>>>>>> +			"Invalid port_id=%u or queue_id=%u\n",
> >>>>>>> +			port_id, queue_id);
> >>>>>>> +		return 0;
> >>>>>>> +	}
> >>>>>>> +#endif
> >>>>>>> +
> >>>>>>> +	p = &rte_eth_fp_ops[port_id];
> >>>>>>> +	qd = p->rxq.data[queue_id];
> >>>>>>> +
> >>>>>>> +#ifdef RTE_ETHDEV_DEBUG_RX
> >>>>>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
> >>>>>>> +
> >>>>>>> +	if (qd == NULL) {
> >>>>>>> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> >>>>>> port_id=%u\n",
> >>>>>>> +			queue_id, port_id);
> >>>>>>> +		return 0;
> >>>>>>> +	}
> >>>>>>> +
> >>>>>>> +	if (!p->rx_direct_rearm)
> >>>>>>
> >>>>>> This check should be done always (unconditionally).
> >>>>>> it is not a mandatory function for the driver (it can safely skip
> >>>>>> to
> >>>> implement it).
> >>>>>>
> >>>>>>> +		return -ENOTSUP;
> >>>>>>
> >>>>>> This function returns uint16_t, why signed integers here?
> >>>>>>
> >>>>>>
> >>>>>>> +#endif
> >>>>>>> +
> >>>>>>> +	nb_rearm = p->rx_direct_rearm(qd, txq_data);
> >>>>>>
> >>>>>> So rx_direct_rearm() function knows how to extract data from TX
> >> queue?
> >>>>>> As I understand that is possible only in one case:
> >>>>>> rx_direct_rearm() has full knowledge and acess of txq internals, etc.
> >>>>>> That means that rxq and txq have to belong to the same driver and
> >>>>>> device type.
> >>>>>>
> >>>>> Thanks for the comments, and I have some questions for this.
> >>>>>
> >>>>>> First of all, I still think it is not the best design choice.
> >>>>>> If we going ahead with introducing this feature, it better be as
> >>>>>> generic as possible.
> >>>>>> Plus it mixes TX and RX code-paths together, while it would be
> >>>>>> much better to to keep them independent as they are right now.
> >>>>>>
> >>>>>> Another thing with such approach - even for the same PMD, both
> >>>>>> TXQ and RXQ can have different internal data format and behavior
> >>>>>> logic (depending on port/queue configuration).
> >>>>> 1. Here TXQ and RXQ have different internal format means the queue
> >>>>> type and  descs can be different, right? If I understand
> >>>>> correctly, based on your first strategy, is it means we will need
> >>>>> different 'rearm_func' for different queue type in the same PMD?
> >>>>
> >>>> Yes, I think so.
> >>>> If let say we have some PMD where depending on the config, there
> >>>> cpuld be
> >>>> 2 different RXQ formats: rxq_a and rxq_b, and 2 different txq
> >>>> formats: txq_c, txq_d.
> >>>> Then assuming PMD would like to support direct-rearm mode for all
> >>>> four combinations, it needs 4 different rearm functions:
> >>>>
> >>>> rearm_txq_c_to_rxq_a()
> >>>> rearm_txq_c_to_rxq_b()
> >>>> rearm_txq_d_to_rxq_a()
> >>>> rearm_txq_d_to_rxq_b()
> >>>>
> >>> Thank you for your detailed explanation, I can understand this.
> >
> >>>>
> >>>>>
> >>>>>> So rx_direct_rearm() function selection have to be done based on
> >>>>>> both RXQ and TXQ config.
> >>>>>> So instead of rte_eth_tx_queue_data_get(), you'll probably need:
> >>>>>> eth_rx_direct_rearm_t rte_eth_get_rx_direct_rearm_func(rx_port,
> >>>>>> rx_queue, tx_port, tx_queue);
> >>>>>> Then, it will be user responsibility to store it somewhere and
> >>>>>> call
> >>>> periodically:
> >>>>>>
> >>>>>> control_path:
> >>>>>> 	...
> >>>>>> 	rearm_func = rte_eth_get_rx_direct_rearm_func(rxport,
> rxqueue,
> >>>>>> 		 txport, txqueue);
> >>>>>> data-path:
> >>>>>> 	while(...) {
> >>>>>> 		rearm_func(rxport, txport, rxqueue, txqueue);
> >>>>>> 		rte_eth_rx_burst(rxport, rxqueue, ....);
> >>>>>> 		rte_eth_tx_burst(txport, txqueue, ....);
> >>>>>> 	}
> >>>>>>
> >>>>>>
> >>>>>> In that case there seems absolutely no point to introduce struct
> >>>>>> rte_eth_txq_data. rx_direct_rearm() accesses TXQ private data
> >>>>>> directly anyway.
> >>>>> 2. This is a very good proposal and it will be our first choice.
> >>>>> Before working on it, I have a few questions about how to
> >>>>> implement
> >>>> 'rearm_func'.
> >>>>> Like you say above, mixed Rx and Tx path code in 'rearm_func'
> >>>>> means the hard-code is mixed like:
> >>>>> rearm_func(...) {
> >>>>>         ...
> >>>>>        txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)];
> >>>>>        for (...) {
> >>>>>           rxep[i].mbuf = txep[i].mbuf;
> >>>>>           mb0 = txep[i].mbuf;
> >>>>>           paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
> >>>>>          dma_addr0 = vdupq_n_u64(paddr);
> >>>>>          vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
> >>>>>        }
> >>>>> }
> >>>>> Is my understanding is right?
> >>>>
> >>>>
> >>>> Sorry, I don't understand the question.
> >>>> Can you probably elaborate a bit?
> >>>
> >>> Sorry for my unclear expression.
> >>>
> >>> I mean if we need two func which contains tx and rx paths code
> >> respectively in rearm_func, like:
> >>> rearm_func(...) {
> >>>            rte_tx_fill_sw_ring;
> >>>            rte_rx_rearm_descs;
> >>> }
> >>>
> >>> Or just mixed tx and rx path code like I said before. I prefer 'rx
> >>> and tx hard code mixed', because from the performance perspective,
> >>> this can
> >> reduce the cost of function calls.
> >>
> >> I suppose it depends on what we choose:
> >> If we restrict it in a way that rxq and txq have to belong to the
> >> same PMD, then I suppose this decision could be left to each particular
> PMD.
> >> If we'd like to allow rearm to work accross different PMDs (i.e. it
> >> would allow to rearm mlx with ice and visa-versa), then yes we need
> >> PMDs somehow to expose rx_sw_ring abstraction to each other.
> >> My preference would be the second one - as it will make this feature
> >> more flexible and would help to adopt it more widely.
> >> Though the first one is probably easier to implement, and as I
> >> udnerstand you are leaning towards the first one.
> >>
> >> Konstantin
> >
> > Hi, Konstantin
> >
> > 1. After further consideration, I think we should give up
> 'rte_eth_get_rx_direct_rearm'  functions.
> >   And just keep one API 'rte_eth_direct_rearm' which contains different
> queue types path in the pmd driver.
> >
> > This is due to that application can dynamically configure
> > queue-mapping in the data path, and thus expand direct-rearm usage
> > scenarios based on this way. For example, consider a flow which
> > received by one rx_queue(rxq_1) and sent by two different types
> tx_queues( type_a txq_1 and type_b txq_2), Users can dynamically map
> direct_rearm according to tx path.
> 
> That's good point, indeed for such case special function pointer doesn't look
> very conveniet.
> I still think that the best way to deal with multiple sources would be to
> expose some sort of rx_sw_ring abstraction.
> Then it could be obtaiend once and passed as a paramter to actual rearm
> function.
> In that case, user can even re-arm RX queue not from TX queue only, but
> from other sources too (freed mbufs, etc.).
> As I understand what you suggest is one unified function:
> int rte_eth_rx_direct_rearm(rx_port, rx_queue, tx_port, tx_queue), correct?
> I am not against it in principle, but does it mean at each invocation, inside
> PMD layer, first thing you would have to do - check that rx_queue and
> tx_queue are from the same device type and select proper behaviour (for
> different queue configs)?

Sorry for my late reply. We just used direct-rearm to deal with multiple sources by expose
some sort of rx_sw_ring abstraction. (For example, Rx: ixgbe   Tx:i40e):
http://patches.dpdk.org/project/dpdk/cover/20230104073043.1120168-1-feifei.wang2@arm.com/
Following are the  test results:
(1) dpdk l3fwd test with multiple drivers:  port 0: 82599 NIC   port 1: XL710 NIC
-------------------------------------------------------------------------------------------------------
		Without fast free	With fast free
Thunderx2:         +9.44%			+7.14%
-------------------------------------------------------------------------------------------------------

(2) dpdk l3fwd test with same driver: port 0 && 1 XL710 NIC
-------------------------------------------------------------------------------------------------------
*Direct rearm with exposing rx_sw_ring:
		Without fast free	With fast free
Ampere altra:     +14.98%		+15.77%
n1sdp:		 +6.47%			+0.52%

*Direct rearm with one unified function (community patch):
		Without fast free	With fast free
Ampere altra:     +17.18%		+16.92%
n1sdp:		 +15.09%		+4.2%
-------------------------------------------------------------------------------------------------------

(3) VPP test with same driver: port 0 && 1 XL710 NIC
-------------------------------------------------------------------------------------------------------
*Direct rearm with exposing rx_sw_ring:
Ampere altra:     +4.59%
n1sdp:		 +5.4%

*Direct rearm with one unified function (community patch):
Ampere altra:     +4.50%
n1sdp:		 +4.5%
-------------------------------------------------------------------------------------------------------
According to the test results, we can see direct-rearm mode by expose rx_sw_ring can
improve the performance significantly for the same driver and different driver.

Compared with direct-rearm with one unified function in community, there will be some
performance degradation in n1sdp for new direct-rearm mode. This is due to we separate fill sw_ring
and flush descriptors operation which may increase cache-misses. For other servers, performance is Ok.
I think performance degradation in n1sdp can be achieved and new scheme can bring better expansibility.

Best Regards
Feifei
> 
> > ----------------------------------------------------------------------
> > ----------------
> > rte_eth_rx_burst(rxq_1);
> >
> > %routing table lookup to decide tx queue% txq_n = txq_1 or txq_2?
> >
> > rte_eth_tx_burst(txq_n);
> > rte_eth_direct_rearm(txq_n);
> > ----------------------------------------------------------------------
> > ---------------- Thus, this can avoid repeatedly calling
> > 'rte_eth_get_rx_direct_rearm' in the data path to reduce performance.
> >
> > Furthermore, rte_eth_direct_rearm can be implemented as:
> > ----------------------------------------------------------------------
> > ---------------- rte_eth_direct_rearm = i40e_eth_direct_rearm;
> > i40e_eth_direct_rearm {
> > 	rearm_queue_config_a path;
> > 	rearm_queue_config_b path;
> > 	rearm_queue_config_c path;
> > 	rearm_queue_config_d path;
> > }
> > ----------------------------------------------------------------------
> > ---------------- This implementation refers to 'rte_eth_rx_burst' and
> > 'rte_eth_tx_burst'. If there will be different queue types, the
> > implementation will be in pmd layer.
> >
> > 2. I'm not sure whether cross pmd usage is realistic.
> > Consider the tradeoff between performance and different pmd map,
> maybe
> > we can provide two different direct rearm mode for users. One is mixed Rx
> and Tx path and is used for the same pmd.
> > The other is separate Rx and Tx path, and is used for different pmd.
> > Best Regards
> > Feifei
> >>
> >>
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Another way - make rte_eth_txq_data totally opaque and allow
> PMD
> >> to
> >>>>>> store there some data that will help it to distinguish expected
> >>>>>> TXQ
> >> format.
> >>>>>> That will allow PMD to keep rx_direct_rearm() the same for all
> >>>>>> supported TXQ formats (it will make decision internally based on
> >>>>>> data stored in txq_data).
> >>>>>> Though in that case you'll probably need one more dev-op to free
> >>>> txq_data.
> >>>>>
> >>>
> >
  

Patch

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 47a55a419e..14f52907c1 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -58,6 +58,8 @@  struct rte_eth_dev {
 	eth_rx_descriptor_status_t rx_descriptor_status;
 	/** Check the status of a Tx descriptor */
 	eth_tx_descriptor_status_t tx_descriptor_status;
+	/**  Use Tx mbufs for Rx to rearm */
+	eth_rx_direct_rearm_t rx_direct_rearm;
 
 	/**
 	 * Device data that is shared between primary and secondary processes
@@ -486,6 +488,11 @@  typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
 typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
 				    uint16_t rx_queue_id);
 
+/**< @internal Get Tx information of a transmit queue of an Ethernet device. */
+typedef void (*eth_txq_data_get_t)(struct rte_eth_dev *dev,
+				      uint16_t tx_queue_id,
+				      struct rte_eth_txq_data *txq_data);
+
 /** @internal Release memory resources allocated by given Rx/Tx queue. */
 typedef void (*eth_queue_release_t)(struct rte_eth_dev *dev,
 				    uint16_t queue_id);
@@ -1138,6 +1145,8 @@  struct eth_dev_ops {
 	eth_rxq_info_get_t         rxq_info_get;
 	/** Retrieve Tx queue information */
 	eth_txq_info_get_t         txq_info_get;
+	/** Get the address where Tx data is stored */
+	eth_txq_data_get_t         txq_data_get;
 	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst mode */
 	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst mode */
 	eth_fw_version_get_t       fw_version_get; /**< Get firmware version */
diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 48090c879a..bfe16c7d77 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -276,6 +276,7 @@  eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
 	fpo->rx_queue_count = dev->rx_queue_count;
 	fpo->rx_descriptor_status = dev->rx_descriptor_status;
 	fpo->tx_descriptor_status = dev->tx_descriptor_status;
+	fpo->rx_direct_rearm = dev->rx_direct_rearm;
 
 	fpo->rxq.data = dev->data->rx_queues;
 	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 0c2c1088c0..0dccec2e4b 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1648,6 +1648,43 @@  rte_eth_dev_is_removed(uint16_t port_id)
 	return ret;
 }
 
+int
+rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
+			struct rte_eth_txq_data *txq_data)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (queue_id >= dev->data->nb_tx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u\n", queue_id);
+		return -EINVAL;
+	}
+
+	if (txq_data == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Cannot get ethdev port %u Tx queue %u data to NULL\n",
+			port_id, queue_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->tx_queues == NULL ||
+			dev->data->tx_queues[queue_id] == NULL) {
+		RTE_ETHDEV_LOG(ERR,
+			   "Tx queue %"PRIu16" of device with port_id=%"
+			   PRIu16" has not been setup\n",
+			   queue_id, port_id);
+		return -EINVAL;
+	}
+
+	if (*dev->dev_ops->txq_data_get == NULL)
+		return -ENOTSUP;
+
+	dev->dev_ops->txq_data_get(dev, queue_id, txq_data);
+
+	return 0;
+}
+
 static int
 rte_eth_rx_queue_check_split(const struct rte_eth_rxseg_split *rx_seg,
 			     uint16_t n_seg, uint32_t *mbp_buf_size,
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 2e783536c1..daf7f05d62 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1949,6 +1949,23 @@  struct rte_eth_txq_info {
 	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
 } __rte_cache_min_aligned;
 
+/**
+ * @internal
+ * Structure used to hold pointers to internal ethdev Tx data.
+ * The main purpose is to load and store Tx queue data in direct rearm mode.
+ */
+struct rte_eth_txq_data {
+	uint64_t *offloads;
+	void *tx_sw_ring;
+	volatile void *tx_ring;
+	uint16_t *tx_next_dd;
+	uint16_t *nb_tx_free;
+	uint16_t nb_tx_desc;
+	uint16_t tx_rs_thresh;
+	uint16_t tx_free_thresh;
+} __rte_cache_min_aligned;
+
+
 /* Generic Burst mode flag definition, values can be ORed. */
 
 /**
@@ -4718,6 +4735,27 @@  int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
 int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
 		const struct rte_eth_rxtx_callback *user_cb);
 
+/**
+ * Get the address which Tx data is stored.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The Tx queue on the Ethernet device for which information
+ *   will be retrieved.
+ * @param txq_data
+ *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENODEV:  If *port_id* is invalid.
+ *   - -ENOTSUP: routine is not supported by the device PMD.
+ *   - -EINVAL:  The queue_id is out of range.
+ */
+__rte_experimental
+int rte_eth_tx_queue_data_get(uint16_t port_id, uint16_t queue_id,
+		struct rte_eth_txq_data *txq_data);
+
 /**
  * Retrieve information about given port's Rx queue.
  *
@@ -6209,6 +6247,63 @@  rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
 	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
 }
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Put Tx buffers into Rx sw-ring and rearm descs.
+ *
+ * @param port_id
+ *   Port identifying the receive side.
+ * @param queue_id
+ *   The index of the transmit queue identifying the receive side.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param txq_data
+ *   A pointer to a structure of type *rte_eth_txq_data* to be filled.
+ * @return
+ *   The number of direct-rearmed buffers.
+ */
+__rte_experimental
+static __rte_always_inline uint16_t
+rte_eth_rx_direct_rearm(uint16_t port_id, uint16_t queue_id,
+		struct rte_eth_txq_data *txq_data)
+{
+	uint16_t nb_rearm;
+	struct rte_eth_fp_ops *p;
+	void *qd;
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	if (port_id >= RTE_MAX_ETHPORTS ||
+			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid port_id=%u or queue_id=%u\n",
+			port_id, queue_id);
+		return 0;
+	}
+#endif
+
+	p = &rte_eth_fp_ops[port_id];
+	qd = p->rxq.data[queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
+
+	if (qd == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
+			queue_id, port_id);
+		return 0;
+	}
+
+	if (!p->rx_direct_rearm)
+		return -ENOTSUP;
+#endif
+
+	nb_rearm = p->rx_direct_rearm(qd, txq_data);
+
+	return nb_rearm;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index dcf8adab92..6a328e2481 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -56,6 +56,9 @@  typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
 /** @internal Check the status of a Tx descriptor */
 typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
 
+/** @internal Put Tx buffers into Rx sw-ring and rearm descs */
+typedef uint16_t (*eth_rx_direct_rearm_t)(void *rxq, struct rte_eth_txq_data *txq_data);
+
 /**
  * @internal
  * Structure used to hold opaque pointers to internal ethdev Rx/Tx
@@ -90,6 +93,8 @@  struct rte_eth_fp_ops {
 	eth_rx_queue_count_t rx_queue_count;
 	/** Check the status of a Rx descriptor. */
 	eth_rx_descriptor_status_t rx_descriptor_status;
+	/** Put Tx buffers into Rx sw-ring and rearm descs */
+	eth_rx_direct_rearm_t rx_direct_rearm;
 	/** Rx queues data. */
 	struct rte_ethdev_qdata rxq;
 	uintptr_t reserved1[3];
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 03f52fee91..69e4c376d3 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -285,6 +285,10 @@  EXPERIMENTAL {
 	rte_mtr_color_in_protocol_priority_get;
 	rte_mtr_color_in_protocol_set;
 	rte_mtr_meter_vlan_table_update;
+
+	# added in 22.11
+	rte_eth_tx_queue_data_get;
+	rte_eth_rx_direct_rearm;
 };
 
 INTERNAL {