[v3,01/14] ethdev: add support for hairpin queue

Message ID 1571130263-120863-2-git-send-email-orika@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add hairpin feature |

Checks

Context Check Description
ci/iol-compilation success Compile Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Ori Kam Oct. 15, 2019, 9:04 a.m. UTC
  This commit introduce hairpin queue type.

The hairpin queue in build from Rx queue binded to Tx queue.
It is used to offload traffic coming from the wire and redirect it back
to the wire.

There are 3 new functions:
- rte_eth_dev_hairpin_capability_get
- rte_eth_rx_hairpin_queue_setup
- rte_eth_tx_hairpin_queue_setup

In order to use the queue, there is a need to create rte_flow
with queue / RSS action that targets one or more of the Rx queues.

Signed-off-by: Ori Kam <orika@mellanox.com>

---
V3:
 - update according to ML comments.

V2:
 - update according to ML comments.

---
 lib/librte_ethdev/rte_ethdev.c           | 260 ++++++++++++++++++++++++++++++-
 lib/librte_ethdev/rte_ethdev.h           | 143 ++++++++++++++++-
 lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
 lib/librte_ethdev/rte_ethdev_version.map |   5 +
 4 files changed, 422 insertions(+), 13 deletions(-)
  

Comments

Andrew Rybchenko Oct. 15, 2019, 10:12 a.m. UTC | #1
Hi Ori,

On 10/15/19 12:04 PM, Ori Kam wrote:
> This commit introduce hairpin queue type.
>
> The hairpin queue in build from Rx queue binded to Tx queue.
> It is used to offload traffic coming from the wire and redirect it back
> to the wire.
>
> There are 3 new functions:
> - rte_eth_dev_hairpin_capability_get
> - rte_eth_rx_hairpin_queue_setup
> - rte_eth_tx_hairpin_queue_setup
>
> In order to use the queue, there is a need to create rte_flow
> with queue / RSS action that targets one or more of the Rx queues.
>
> Signed-off-by: Ori Kam <orika@mellanox.com>
>
> ---
> V3:
>   - update according to ML comments.
>
> V2:
>   - update according to ML comments.
>
> ---
>   lib/librte_ethdev/rte_ethdev.c           | 260 ++++++++++++++++++++++++++++++-
>   lib/librte_ethdev/rte_ethdev.h           | 143 ++++++++++++++++-
>   lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
>   lib/librte_ethdev/rte_ethdev_version.map |   5 +
>   4 files changed, 422 insertions(+), 13 deletions(-)
>
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index af82360..22a97de 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -904,10 +904,19 @@ struct rte_eth_dev *
>   
>   	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
>   
> -	if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
> -		RTE_ETHDEV_LOG(INFO,
> -			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
> +	if (dev->data->rx_queue_state[rx_queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> +		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
> +			"port_id=%"PRIu16" is hairpin queue\n",
>   			rx_queue_id, port_id);
> +		return -EINVAL;
> +	}
> +
> +	if (dev->data->rx_queue_state[rx_queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_STARTED) {
> +		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
> +			"port_id=%"PRIu16" already started\n", rx_queue_id,
> +			port_id);
>   		return 0;
>   	}
>   

You should not touch existing code here. Yes, line is longer than 80 
symbols,
but fixing codding style is a separate thing.

Also format string should not be split into many lines since it
makes it hard to use grep to find it in sources (i.e. when you
see it in longs and would like to find corresponding line in
sources).

[snip]

> @@ -1758,6 +1791,92 @@ struct rte_eth_dev *
>   }
>   
>   int
> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> +			       uint16_t nb_rx_desc,
> +			       const struct rte_eth_hairpin_conf *conf)
> +{
> +	int ret;
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_hairpin_cap cap;
> +	struct rte_eth_dev_info dev_info;
> +	void **rxq;
> +	int i;
> +	int count = 0;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
> +		return -EINVAL;
> +	}
> +	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
> +	if (ret != 0)
> +		return ret;
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);

There is not necessity to check the pointer here, since it is checked
inside rte_eth_dev_info_get() and error is returned.

> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
> +				-ENOTSUP);
> +	/* Use default specified by driver, if nb_rx_desc is zero */
> +	if (nb_rx_desc == 0)
> +		nb_rx_desc = cap.max_nb_desc;
> +	if (nb_rx_desc > cap.max_nb_desc) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
> +		return -EINVAL;
> +	}
> +	ret = rte_eth_dev_info_get(port_id, &dev_info);
> +	if (ret != 0)
> +		return ret;
> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> +		return -EBUSY;
> +	if (dev->data->dev_started &&
> +		(dev->data->rx_queue_state[rx_queue_id] !=
> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;

As I understand it does not allow to change hairpin queue setup
by calling setup once agian.

> +	if (conf->peer_n > cap.max_rx_2_tx) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: <= %hu", conf->peer_n,
> +			       cap.max_rx_2_tx);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: > 0", conf->peer_n);
> +		return -EINVAL;
> +	}
> +	if (cap.max_n_queues != UINT16_MAX) {
> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +			if (dev->data->rx_queue_state[i] ==
> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +				count++;
> +		}
> +		if (count > cap.max_n_queues) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "To many Rx hairpin queues %d", count);
> +			return -EINVAL;
> +		}
> +	}
> +	rxq = dev->data->rx_queues;
> +	if (rxq[rx_queue_id] != NULL) {
> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> +		rxq[rx_queue_id] = NULL;
> +	}
> +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> +						      nb_rx_desc, conf);
> +	if (ret == 0)
> +		dev->data->rx_queue_state[rx_queue_id] =
> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> +	return eth_err(port_id, ret);
> +}
> +
> +int
>   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		       uint16_t nb_tx_desc, unsigned int socket_id,
>   		       const struct rte_eth_txconf *tx_conf)
> @@ -1851,9 +1970,92 @@ struct rte_eth_dev *
>   			__func__);
>   		return -EINVAL;
>   	}
> +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
> +					      socket_id, &local_conf);
> +	return eth_err(port_id, ret);
> +}

Unrelated change

>   
> -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
> -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
> +int
> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> +			       uint16_t nb_tx_desc,
> +			       const struct rte_eth_hairpin_conf *conf)
> +{
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_hairpin_cap cap;
> +	struct rte_eth_dev_info dev_info;
> +	void **txq;
> +	int i;
> +	int count = 0;
> +	int ret;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +	dev = &rte_eth_devices[port_id];
> +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
> +		return -EINVAL;
> +	}
> +	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
> +	if (ret != 0)
> +		return ret;
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);

There is not necessity to check the pointer here, since it is checked
inside rte_eth_dev_info_get() and error is returned.

> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
> +				-ENOTSUP);
> +	rte_eth_dev_info_get(port_id, &dev_info);


Please, check return status.

> +	/* Use default specified by driver, if nb_tx_desc is zero */
> +	if (nb_tx_desc == 0)
> +		nb_tx_desc = cap.max_nb_desc;
> +	if (nb_tx_desc > cap.max_nb_desc) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_tx_desc(=%hu), should be: "
> +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n > cap.max_tx_2_rx) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: <= %hu", conf->peer_n,
> +			       cap.max_tx_2_rx);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: > 0", conf->peer_n);
> +		return -EINVAL;
> +	}
> +	if (cap.max_n_queues != UINT16_MAX) {
> +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +			if (dev->data->tx_queue_state[i] ==
> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +				count++;
> +		}
> +		if (count > cap.max_n_queues) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "To many Rx hairpin queues %d", count);
> +			return -EINVAL;
> +		}
> +	}

I don't understand why order of checks differ above and here.

> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> +		return -EBUSY;
> +	if (dev->data->dev_started &&
> +		(dev->data->tx_queue_state[tx_queue_id] !=
> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;

As I understand it does not allow to change hairpin queue setup
by calling setup once agian.

> +	txq = dev->data->tx_queues;
> +	if (txq[tx_queue_id] != NULL) {
> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> +		txq[tx_queue_id] = NULL;
> +	}
> +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
> +		(dev, tx_queue_id, nb_tx_desc, conf);
> +	if (ret == 0)
> +		dev->data->tx_queue_state[tx_queue_id] =
> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> +	return eth_err(port_id, ret);
>   }
>   
>   void

[snip]

> diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> index d937fb4..51843c1 100644
> --- a/lib/librte_ethdev/rte_ethdev.h
> +++ b/lib/librte_ethdev/rte_ethdev.h

[snip]

> @@ -1277,6 +1317,7 @@ struct rte_eth_dcb_info {
>    */
>   #define RTE_ETH_QUEUE_STATE_STOPPED 0
>   #define RTE_ETH_QUEUE_STATE_STARTED 1
> +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2

See my notes below.
Also, may be out of scope of the review, but
I'd move these defines out of public header to rte_ethdev_driver.h
in a separate patch.

>   #define RTE_ETH_ALL RTE_MAX_ETHPORTS
>   
> @@ -1771,6 +1812,36 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   		struct rte_mempool *mb_pool);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a hairpin receive queue for an Ethernet device.
> + *
> + * The function set up the selected queue to be used in hairpin.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param rx_queue_id
> + *   The index of the receive queue to set up.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param nb_rx_desc
> + *   The number of receive descriptors to allocate for the receive ring.
> + *   0 means the PMD will use default value.
> + * @param conf
> + *   The pointer to the hairpin configuration.

There is empty line between parameters and return description below,
but it is missing here. It should be the same in both places and I'd
prefer to have empty line to make it easier to read.

> + * @return
> + *   - (0) if successful.
> + *   - (-ENOTSUP) if hardware doesn't support.
> + *   - (-EINVAL) if bad parameter.
> + *   - (-ENOMEM) if unable to allocate the resources.
> + */
> +__rte_experimental
> +int rte_eth_rx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +
> +/**
>    * Allocate and set up a transmit queue for an Ethernet device.
>    *
>    * @param port_id
> @@ -1823,6 +1894,35 @@ int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		const struct rte_eth_txconf *tx_conf);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a transmit hairpin queue for an Ethernet device.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param tx_queue_id
> + *   The index of the transmit queue to set up.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param nb_tx_desc
> + *   The number of transmit descriptors to allocate for the transmit ring.
> + *   0 to set default PMD value.
> + * @param conf
> + *   The hairpin configuration.
> + *
> + * @return
> + *   - (0) if successful.
> + *   - (-ENOTSUP) if hardware doesn't support.
> + *   - (-EINVAL) if bad parameter.
> + *   - (-ENOMEM) if unable to allocate the resources.
> + */
> +__rte_experimental
> +int rte_eth_tx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +
> +/**
>    * Return the NUMA socket to which an Ethernet device is connected
>    *
>    * @param port_id

[snip]

> diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
> index dcb5ae6..ef46e71 100644
> --- a/lib/librte_ethdev/rte_ethdev_core.h
> +++ b/lib/librte_ethdev/rte_ethdev_core.h
> @@ -250,6 +250,12 @@ typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    struct rte_mempool *mb_pool);
>   /**< @internal Set up a receive queue of an Ethernet device. */
>   
> +typedef int (*eth_rx_hairpin_queue_setup_t)
> +	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +	 uint16_t nb_rx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +/**< @internal Set up a receive hairpin queue of an Ethernet device. */
> +

Please, write down full description similar to eth_promiscuous_enable_t
before the typedef. Don't forgot about return values listing.

>   typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    uint16_t tx_queue_id,
>   				    uint16_t nb_tx_desc,
> @@ -257,6 +263,12 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    const struct rte_eth_txconf *tx_conf);
>   /**< @internal Setup a transmit queue of an Ethernet device. */
>   
> +typedef int (*eth_tx_hairpin_queue_setup_t)
> +	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> +	 uint16_t nb_tx_desc,
> +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> +/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
> +


Please, write down full description similar to eth_promiscuous_enable_t
before the typedef. Don't forgot about return values listing.

>   typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>   				    uint16_t rx_queue_id);
>   /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> @@ -505,6 +517,10 @@ typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev,
>   						const char *pool);
>   /**< @internal Test if a port supports specific mempool ops */
>   
> +typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
> +				     struct rte_eth_hairpin_cap *cap);
> +/**< @internal get the hairpin capabilities. */
> +

Please, write down full description similar to eth_promiscuous_enable_t
before the typedef. Don't forgot about return values listing.

If you reorder functions as suggested below, hairpin queue setup
typedefs should be defiend here.

>   /**
>    * @internal A structure containing the functions exported by an Ethernet driver.
>    */
> @@ -557,6 +573,8 @@ struct eth_dev_ops {
>   	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
>   	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
>   	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue. */
> +	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
> +	/**< Set up device RX hairpin queue. */
>   	eth_queue_release_t        rx_queue_release; /**< Release RX queue. */
>   	eth_rx_queue_count_t       rx_queue_count;
>   	/**< Get the number of used RX descriptors. */
> @@ -568,6 +586,8 @@ struct eth_dev_ops {
>   	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue interrupt. */
>   	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt. */
>   	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue. */
> +	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
> +	/**< Set up device TX hairpin queue. */
>   	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
>   	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
>   
> @@ -639,6 +659,9 @@ struct eth_dev_ops {
>   
>   	eth_pool_ops_supported_t pool_ops_supported;
>   	/**< Test if a port supports specific mempool ops */
> +
> +	eth_hairpin_cap_get_t hairpin_cap_get;
> +	/**< Returns the hairpin capabilities. */

May I suggest to put hairpin queue setup functions here.
It will group hairpin related functions here.

>   };
>   
>   /**
> @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
>   		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
>   		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
>   	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> -			/**< Queues state: STARTED(1) / STOPPED(0). */
> +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
>   	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> -			/**< Queues state: STARTED(1) / STOPPED(0). */
> +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */

/ is missing above after HAIRPIN(2).
In fact there is no point to duplicate values in parenthesis, but it is
out of scope of the review.

I'm not 100% happy that it makes impossible to mark hairpin queues
as started/stopped. It is not that important right now, but may be it is
better to use state as bit field. Bit 0 - stopped/started,
bit 1 - regular/hairpin. Anyway, it is internal interface.

[snip]
  
Ori Kam Oct. 16, 2019, 7:36 p.m. UTC | #2
Hi Andrew,

Thanks again for your time.

PSB, 
Ori


> -----Original Message-----
> From: Andrew Rybchenko <arybchenko@solarflare.com>
> Sent: Tuesday, October 15, 2019 1:12 PM
> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
> Subject: Re: [PATCH v3 01/14] ethdev: add support for hairpin queue
> 
> Hi Ori,
> 
> On 10/15/19 12:04 PM, Ori Kam wrote:
> > This commit introduce hairpin queue type.
> >
> > The hairpin queue in build from Rx queue binded to Tx queue.
> > It is used to offload traffic coming from the wire and redirect it back
> > to the wire.
> >
> > There are 3 new functions:
> > - rte_eth_dev_hairpin_capability_get
> > - rte_eth_rx_hairpin_queue_setup
> > - rte_eth_tx_hairpin_queue_setup
> >
> > In order to use the queue, there is a need to create rte_flow
> > with queue / RSS action that targets one or more of the Rx queues.
> >
> > Signed-off-by: Ori Kam <orika@mellanox.com>
> >
> > ---
> > V3:
> >   - update according to ML comments.
> >
> > V2:
> >   - update according to ML comments.
> >
> > ---
> >   lib/librte_ethdev/rte_ethdev.c           | 260
> ++++++++++++++++++++++++++++++-
> >   lib/librte_ethdev/rte_ethdev.h           | 143 ++++++++++++++++-
> >   lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
> >   lib/librte_ethdev/rte_ethdev_version.map |   5 +
> >   4 files changed, 422 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> > index af82360..22a97de 100644
> > --- a/lib/librte_ethdev/rte_ethdev.c
> > +++ b/lib/librte_ethdev/rte_ethdev.c
> > @@ -904,10 +904,19 @@ struct rte_eth_dev *
> >
> >   	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -
> ENOTSUP);
> >
> > -	if (dev->data->rx_queue_state[rx_queue_id] !=
> RTE_ETH_QUEUE_STATE_STOPPED) {
> > -		RTE_ETHDEV_LOG(INFO,
> > -			"Queue %"PRIu16" of device with port_id=%"PRIu16"
> already started\n",
> > +	if (dev->data->rx_queue_state[rx_queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> > +		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
> > +			"port_id=%"PRIu16" is hairpin queue\n",
> >   			rx_queue_id, port_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (dev->data->rx_queue_state[rx_queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_STARTED) {
> > +		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
> > +			"port_id=%"PRIu16" already started\n", rx_queue_id,
> > +			port_id);
> >   		return 0;
> >   	}
> >
> 
> You should not touch existing code here. Yes, line is longer than 80
> symbols,
> but fixing codding style is a separate thing.
> 

The code style was done since I needed to change the if statement from
original one that was != stopped to == started. But If you prefer I can undo this change.

> Also format string should not be split into many lines since it
> makes it hard to use grep to find it in sources (i.e. when you
> see it in longs and would like to find corresponding line in
> sources).
> 

O.K by me, I'm used to mlx5 where line length is the more important thing.

> [snip]
> 
> > @@ -1758,6 +1791,92 @@ struct rte_eth_dev *
> >   }
> >
> >   int
> > +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> > +			       uint16_t nb_rx_desc,
> > +			       const struct rte_eth_hairpin_conf *conf)
> > +{
> > +	int ret;
> > +	struct rte_eth_dev *dev;
> > +	struct rte_eth_hairpin_cap cap;
> > +	struct rte_eth_dev_info dev_info;
> > +	void **rxq;
> > +	int i;
> > +	int count = 0;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> > +
> > +	dev = &rte_eth_devices[port_id];
> > +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
> rx_queue_id);
> > +		return -EINVAL;
> > +	}
> > +	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
> > +	if (ret != 0)
> > +		return ret;
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> ENOTSUP);
> 
> There is not necessity to check the pointer here, since it is checked
> inside rte_eth_dev_info_get() and error is returned.
> 

O.K.

> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >rx_hairpin_queue_setup,
> > +				-ENOTSUP);
> > +	/* Use default specified by driver, if nb_rx_desc is zero */
> > +	if (nb_rx_desc == 0)
> > +		nb_rx_desc = cap.max_nb_desc;
> > +	if (nb_rx_desc > cap.max_nb_desc) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> > +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
> > +		return -EINVAL;
> > +	}
> > +	ret = rte_eth_dev_info_get(port_id, &dev_info);
> > +	if (ret != 0)
> > +		return ret;
> > +	if (dev->data->dev_started &&
> > +		!(dev_info.dev_capa &
> > +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> > +		return -EBUSY;
> > +	if (dev->data->dev_started &&
> > +		(dev->data->rx_queue_state[rx_queue_id] !=
> > +		 RTE_ETH_QUEUE_STATE_STOPPED))
> > +		return -EBUSY;
> 
> As I understand it does not allow to change hairpin queue setup
> by calling setup once agian.
> 

I will replace both above if cases to return error if dev is started. Since we can't support 
hairpin change when device is working.

> > +	if (conf->peer_n > cap.max_rx_2_tx) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: <= %hu", conf->peer_n,
> > +			       cap.max_rx_2_tx);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n == 0) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: > 0", conf->peer_n);
> > +		return -EINVAL;
> > +	}
> > +	if (cap.max_n_queues != UINT16_MAX) {
> > +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +			if (dev->data->rx_queue_state[i] ==
> > +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +				count++;
> > +		}
> > +		if (count > cap.max_n_queues) {
> > +			RTE_ETHDEV_LOG(ERR,
> > +				       "To many Rx hairpin queues %d", count);
> > +			return -EINVAL;
> > +		}
> > +	}
> > +	rxq = dev->data->rx_queues;
> > +	if (rxq[rx_queue_id] != NULL) {
> > +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >rx_queue_release,
> > +					-ENOTSUP);
> > +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> > +		rxq[rx_queue_id] = NULL;
> > +	}
> > +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> > +						      nb_rx_desc, conf);
> > +	if (ret == 0)
> > +		dev->data->rx_queue_state[rx_queue_id] =
> > +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> > +	return eth_err(port_id, ret);
> > +}
> > +
> > +int
> >   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> >   		       uint16_t nb_tx_desc, unsigned int socket_id,
> >   		       const struct rte_eth_txconf *tx_conf)
> > @@ -1851,9 +1970,92 @@ struct rte_eth_dev *
> >   			__func__);
> >   		return -EINVAL;
> >   	}
> > +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
> > +					      socket_id, &local_conf);
> > +	return eth_err(port_id, ret);
> > +}
> 
> Unrelated change
> 

O.K. will remove.

> >
> > -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
> > -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
> > +int
> > +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> > +			       uint16_t nb_tx_desc,
> > +			       const struct rte_eth_hairpin_conf *conf)
> > +{
> > +	struct rte_eth_dev *dev;
> > +	struct rte_eth_hairpin_cap cap;
> > +	struct rte_eth_dev_info dev_info;
> > +	void **txq;
> > +	int i;
> > +	int count = 0;
> > +	int ret;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> > +	dev = &rte_eth_devices[port_id];
> > +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
> tx_queue_id);
> > +		return -EINVAL;
> > +	}
> > +	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
> > +	if (ret != 0)
> > +		return ret;
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> ENOTSUP);
> 
> There is not necessity to check the pointer here, since it is checked
> inside rte_eth_dev_info_get() and error is returned.
> 

O.K. will remove function call.

> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >tx_hairpin_queue_setup,
> > +				-ENOTSUP);
> > +	rte_eth_dev_info_get(port_id, &dev_info);
> 
> 
> Please, check return status.
> 

Sure, will add check.

> > +	/* Use default specified by driver, if nb_tx_desc is zero */
> > +	if (nb_tx_desc == 0)
> > +		nb_tx_desc = cap.max_nb_desc;
> > +	if (nb_tx_desc > cap.max_nb_desc) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for nb_tx_desc(=%hu), should be: "
> > +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n > cap.max_tx_2_rx) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: <= %hu", conf->peer_n,
> > +			       cap.max_tx_2_rx);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n == 0) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: > 0", conf->peer_n);
> > +		return -EINVAL;
> > +	}
> > +	if (cap.max_n_queues != UINT16_MAX) {
> > +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +			if (dev->data->tx_queue_state[i] ==
> > +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +				count++;
> > +		}
> > +		if (count > cap.max_n_queues) {
> > +			RTE_ETHDEV_LOG(ERR,
> > +				       "To many Rx hairpin queues %d", count);
> > +			return -EINVAL;
> > +		}
> > +	}
> 
> I don't understand why order of checks differ above and here.
> 

Will align the order.

> > +	if (dev->data->dev_started &&
> > +		!(dev_info.dev_capa &
> > +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> > +		return -EBUSY;
> > +	if (dev->data->dev_started &&
> > +		(dev->data->tx_queue_state[tx_queue_id] !=
> > +		 RTE_ETH_QUEUE_STATE_STOPPED))
> > +		return -EBUSY;
> 
> As I understand it does not allow to change hairpin queue setup
> by calling setup once agian.
> 

Like above I will check only if device is started, since we can't update hairpin queue
while device is started.

> > +	txq = dev->data->tx_queues;
> > +	if (txq[tx_queue_id] != NULL) {
> > +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >tx_queue_release,
> > +					-ENOTSUP);
> > +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> > +		txq[tx_queue_id] = NULL;
> > +	}
> > +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
> > +		(dev, tx_queue_id, nb_tx_desc, conf);
> > +	if (ret == 0)
> > +		dev->data->tx_queue_state[tx_queue_id] =
> > +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> > +	return eth_err(port_id, ret);
> >   }
> >
> >   void
> 
> [snip]
> 
> > diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> > index d937fb4..51843c1 100644
> > --- a/lib/librte_ethdev/rte_ethdev.h
> > +++ b/lib/librte_ethdev/rte_ethdev.h
> 
> [snip]
> 
> > @@ -1277,6 +1317,7 @@ struct rte_eth_dcb_info {
> >    */
> >   #define RTE_ETH_QUEUE_STATE_STOPPED 0
> >   #define RTE_ETH_QUEUE_STATE_STARTED 1
> > +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
> 
> See my notes below.
> Also, may be out of scope of the review, but
> I'd move these defines out of public header to rte_ethdev_driver.h
> in a separate patch.
>

O.K. I will add a commit to move them.
 
> >   #define RTE_ETH_ALL RTE_MAX_ETHPORTS
> >
> > @@ -1771,6 +1812,36 @@ int rte_eth_rx_queue_setup(uint16_t port_id,
> uint16_t rx_queue_id,
> >   		struct rte_mempool *mb_pool);
> >
> >   /**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * Allocate and set up a hairpin receive queue for an Ethernet device.
> > + *
> > + * The function set up the selected queue to be used in hairpin.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param rx_queue_id
> > + *   The index of the receive queue to set up.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @param nb_rx_desc
> > + *   The number of receive descriptors to allocate for the receive ring.
> > + *   0 means the PMD will use default value.
> > + * @param conf
> > + *   The pointer to the hairpin configuration.
> 
> There is empty line between parameters and return description below,
> but it is missing here. It should be the same in both places and I'd
> prefer to have empty line to make it easier to read.
> 

This is the how the rte_eth_dev_info_get looks like. Just following your request 😊
I think the empty line is better so I will add it.

> > + * @return
> > + *   - (0) if successful.
> > + *   - (-ENOTSUP) if hardware doesn't support.
> > + *   - (-EINVAL) if bad parameter.
> > + *   - (-ENOMEM) if unable to allocate the resources.
> > + */
> > +__rte_experimental
> > +int rte_eth_rx_hairpin_queue_setup
> > +	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +
> > +/**
> >    * Allocate and set up a transmit queue for an Ethernet device.
> >    *
> >    * @param port_id
> > @@ -1823,6 +1894,35 @@ int rte_eth_tx_queue_setup(uint16_t port_id,
> uint16_t tx_queue_id,
> >   		const struct rte_eth_txconf *tx_conf);
> >
> >   /**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * Allocate and set up a transmit hairpin queue for an Ethernet device.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param tx_queue_id
> > + *   The index of the transmit queue to set up.
> > + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @param nb_tx_desc
> > + *   The number of transmit descriptors to allocate for the transmit ring.
> > + *   0 to set default PMD value.
> > + * @param conf
> > + *   The hairpin configuration.
> > + *
> > + * @return
> > + *   - (0) if successful.
> > + *   - (-ENOTSUP) if hardware doesn't support.
> > + *   - (-EINVAL) if bad parameter.
> > + *   - (-ENOMEM) if unable to allocate the resources.
> > + */
> > +__rte_experimental
> > +int rte_eth_tx_hairpin_queue_setup
> > +	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +
> > +/**
> >    * Return the NUMA socket to which an Ethernet device is connected
> >    *
> >    * @param port_id
> 
> [snip]
> 
> > diff --git a/lib/librte_ethdev/rte_ethdev_core.h
> b/lib/librte_ethdev/rte_ethdev_core.h
> > index dcb5ae6..ef46e71 100644
> > --- a/lib/librte_ethdev/rte_ethdev_core.h
> > +++ b/lib/librte_ethdev/rte_ethdev_core.h
> > @@ -250,6 +250,12 @@ typedef int (*eth_rx_queue_setup_t)(struct
> rte_eth_dev *dev,
> >   				    struct rte_mempool *mb_pool);
> >   /**< @internal Set up a receive queue of an Ethernet device. */
> >
> > +typedef int (*eth_rx_hairpin_queue_setup_t)
> > +	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> > +	 uint16_t nb_rx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +/**< @internal Set up a receive hairpin queue of an Ethernet device. */
> > +
> 
> Please, write down full description similar to eth_promiscuous_enable_t
> before the typedef. Don't forgot about return values listing.
> 

O.K. will add description.

> >   typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
> >   				    uint16_t tx_queue_id,
> >   				    uint16_t nb_tx_desc,
> > @@ -257,6 +263,12 @@ typedef int (*eth_tx_queue_setup_t)(struct
> rte_eth_dev *dev,
> >   				    const struct rte_eth_txconf *tx_conf);
> >   /**< @internal Setup a transmit queue of an Ethernet device. */
> >
> > +typedef int (*eth_tx_hairpin_queue_setup_t)
> > +	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> > +	 uint16_t nb_tx_desc,
> > +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> > +/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
> > +
> 
> 
> Please, write down full description similar to eth_promiscuous_enable_t
> before the typedef. Don't forgot about return values listing.
> 
O.K. will add description.

> >   typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >   				    uint16_t rx_queue_id);
> >   /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> > @@ -505,6 +517,10 @@ typedef int (*eth_pool_ops_supported_t)(struct
> rte_eth_dev *dev,
> >   						const char *pool);
> >   /**< @internal Test if a port supports specific mempool ops */
> >
> > +typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
> > +				     struct rte_eth_hairpin_cap *cap);
> > +/**< @internal get the hairpin capabilities. */
> > +
> 
> Please, write down full description similar to eth_promiscuous_enable_t
> before the typedef. Don't forgot about return values listing.
>

O.K. will add description.

 
> If you reorder functions as suggested below, hairpin queue setup
> typedefs should be defiend here.
> 
> >   /**
> >    * @internal A structure containing the functions exported by an Ethernet
> driver.
> >    */
> > @@ -557,6 +573,8 @@ struct eth_dev_ops {
> >   	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
> >   	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
> >   	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX
> queue. */
> > +	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
> > +	/**< Set up device RX hairpin queue. */
> >   	eth_queue_release_t        rx_queue_release; /**< Release RX queue.
> */
> >   	eth_rx_queue_count_t       rx_queue_count;
> >   	/**< Get the number of used RX descriptors. */
> > @@ -568,6 +586,8 @@ struct eth_dev_ops {
> >   	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue
> interrupt. */
> >   	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue
> interrupt. */
> >   	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX
> queue. */
> > +	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
> > +	/**< Set up device TX hairpin queue. */
> >   	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
> >   	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
> >
> > @@ -639,6 +659,9 @@ struct eth_dev_ops {
> >
> >   	eth_pool_ops_supported_t pool_ops_supported;
> >   	/**< Test if a port supports specific mempool ops */
> > +
> > +	eth_hairpin_cap_get_t hairpin_cap_get;
> > +	/**< Returns the hairpin capabilities. */
> 
> May I suggest to put hairpin queue setup functions here.
> It will group hairpin related functions here.
> 

I don't care, both makes sense to me (group the setup function or group the hairpin function)
But I will follow your suggestion.

> >   };
> >
> >   /**
> > @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
> >   		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0).
> */
> >   		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
> >   	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> > -			/**< Queues state: STARTED(1) / STOPPED(0). */
> > +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
> >   	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> > -			/**< Queues state: STARTED(1) / STOPPED(0). */
> > +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
> 
> / is missing above after HAIRPIN(2).
> In fact there is no point to duplicate values in parenthesis, but it is
> out of scope of the review.
> 

I will add the missing /

> I'm not 100% happy that it makes impossible to mark hairpin queues
> as started/stopped. It is not that important right now, but may be it is
> better to use state as bit field. Bit 0 - stopped/started,
> bit 1 - regular/hairpin. Anyway, it is internal interface.
> 

Your idea is very nice, but there are some things to consider.
For example if converted to bits it will take more memory.
We can just it is flags for the U8 but this will mean that we could have 
both hairpin and stopped / started in the same time. Which I'm not sure if 
it is good or bad. Like you say it is internal so let's keep the current implementation,
and discuss your idea after this patch set, and after we will see how the community uses 
the hairpin feature. Is that O.K. for you?

 

> [snip]
  
Andrew Rybchenko Oct. 17, 2019, 10:41 a.m. UTC | #3
Hi Ori,

On 10/16/19 10:36 PM, Ori Kam wrote:
> Hi Andrew,
>
> Thanks again for your time.
>
> PSB,
> Ori
>
>
>> -----Original Message-----
>> From: Andrew Rybchenko <arybchenko@solarflare.com>
>> Sent: Tuesday, October 15, 2019 1:12 PM
>> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
>> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
>> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
>> Subject: Re: [PATCH v3 01/14] ethdev: add support for hairpin queue
>>
>> Hi Ori,
>>
>> On 10/15/19 12:04 PM, Ori Kam wrote:
>>> This commit introduce hairpin queue type.
>>>
>>> The hairpin queue in build from Rx queue binded to Tx queue.
>>> It is used to offload traffic coming from the wire and redirect it back
>>> to the wire.
>>>
>>> There are 3 new functions:
>>> - rte_eth_dev_hairpin_capability_get
>>> - rte_eth_rx_hairpin_queue_setup
>>> - rte_eth_tx_hairpin_queue_setup
>>>
>>> In order to use the queue, there is a need to create rte_flow
>>> with queue / RSS action that targets one or more of the Rx queues.
>>>
>>> Signed-off-by: Ori Kam <orika@mellanox.com>

[snip]

>>> @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
>>>    		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
>>>    		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
>>>    	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
>>> -			/**< Queues state: STARTED(1) / STOPPED(0). */
>>> +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
>>>    	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
>>> -			/**< Queues state: STARTED(1) / STOPPED(0). */
>>> +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
>> / is missing above after HAIRPIN(2).
>> In fact there is no point to duplicate values in parenthesis, but it is
>> out of scope of the review.
>>
> I will add the missing /
>
>> I'm not 100% happy that it makes impossible to mark hairpin queues
>> as started/stopped. It is not that important right now, but may be it is
>> better to use state as bit field. Bit 0 - stopped/started,
>> bit 1 - regular/hairpin. Anyway, it is internal interface.
>>
> Your idea is very nice, but there are some things to consider.
> For example if converted to bits it will take more memory.
> We can just it is flags for the U8 but this will mean that we could have
> both hairpin and stopped / started in the same time. Which I'm not sure if
> it is good or bad. Like you say it is internal so let's keep the current implementation,
> and discuss your idea after this patch set, and after we will see how the community uses
> the hairpin feature. Is that O.K. for you?

Yes.
  

Patch

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index af82360..22a97de 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -904,10 +904,19 @@  struct rte_eth_dev *
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
 
-	if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_ETHDEV_LOG(INFO,
-			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
+	if (dev->data->rx_queue_state[rx_queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n",
 			rx_queue_id, port_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->rx_queue_state[rx_queue_id] ==
+	    RTE_ETH_QUEUE_STATE_STARTED) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" already started\n", rx_queue_id,
+			port_id);
 		return 0;
 	}
 
@@ -931,6 +940,14 @@  struct rte_eth_dev *
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP);
 
+	if (dev->data->rx_queue_state[rx_queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n", rx_queue_id,
+			port_id);
+		return -EINVAL;
+	}
+
 	if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
 		RTE_ETHDEV_LOG(INFO,
 			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
@@ -964,6 +981,14 @@  struct rte_eth_dev *
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP);
 
+	if (dev->data->tx_queue_state[tx_queue_id] ==
+	   RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n", tx_queue_id,
+			port_id);
+		return -EINVAL;
+	}
+
 	if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
 		RTE_ETHDEV_LOG(INFO,
 			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
@@ -989,6 +1014,14 @@  struct rte_eth_dev *
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP);
 
+	if (dev->data->tx_queue_state[tx_queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n", tx_queue_id,
+			port_id);
+		return -EINVAL;
+	}
+
 	if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
 		RTE_ETHDEV_LOG(INFO,
 			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
@@ -1758,6 +1791,92 @@  struct rte_eth_dev *
 }
 
 int
+rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
+			       uint16_t nb_rx_desc,
+			       const struct rte_eth_hairpin_conf *conf)
+{
+	int ret;
+	struct rte_eth_dev *dev;
+	struct rte_eth_hairpin_cap cap;
+	struct rte_eth_dev_info dev_info;
+	void **rxq;
+	int i;
+	int count = 0;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
+		return -EINVAL;
+	}
+	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
+	if (ret != 0)
+		return ret;
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
+				-ENOTSUP);
+	/* Use default specified by driver, if nb_rx_desc is zero */
+	if (nb_rx_desc == 0)
+		nb_rx_desc = cap.max_nb_desc;
+	if (nb_rx_desc > cap.max_nb_desc) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for nb_rx_desc(=%hu), should be: "
+			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
+		return -EINVAL;
+	}
+	ret = rte_eth_dev_info_get(port_id, &dev_info);
+	if (ret != 0)
+		return ret;
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
+		return -EBUSY;
+	if (dev->data->dev_started &&
+		(dev->data->rx_queue_state[rx_queue_id] !=
+		 RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+	if (conf->peer_n > cap.max_rx_2_tx) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: <= %hu", conf->peer_n,
+			       cap.max_rx_2_tx);
+		return -EINVAL;
+	}
+	if (conf->peer_n == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: > 0", conf->peer_n);
+		return -EINVAL;
+	}
+	if (cap.max_n_queues != UINT16_MAX) {
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			if (dev->data->rx_queue_state[i] ==
+			    RTE_ETH_QUEUE_STATE_HAIRPIN)
+				count++;
+		}
+		if (count > cap.max_n_queues) {
+			RTE_ETHDEV_LOG(ERR,
+				       "To many Rx hairpin queues %d", count);
+			return -EINVAL;
+		}
+	}
+	rxq = dev->data->rx_queues;
+	if (rxq[rx_queue_id] != NULL) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
+					-ENOTSUP);
+		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
+		rxq[rx_queue_id] = NULL;
+	}
+	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
+						      nb_rx_desc, conf);
+	if (ret == 0)
+		dev->data->rx_queue_state[rx_queue_id] =
+			RTE_ETH_QUEUE_STATE_HAIRPIN;
+	return eth_err(port_id, ret);
+}
+
+int
 rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		       uint16_t nb_tx_desc, unsigned int socket_id,
 		       const struct rte_eth_txconf *tx_conf)
@@ -1851,9 +1970,92 @@  struct rte_eth_dev *
 			__func__);
 		return -EINVAL;
 	}
+	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
+					      socket_id, &local_conf);
+	return eth_err(port_id, ret);
+}
 
-	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
-		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
+int
+rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
+			       uint16_t nb_tx_desc,
+			       const struct rte_eth_hairpin_conf *conf)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_hairpin_cap cap;
+	struct rte_eth_dev_info dev_info;
+	void **txq;
+	int i;
+	int count = 0;
+	int ret;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	dev = &rte_eth_devices[port_id];
+	if (tx_queue_id >= dev->data->nb_tx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
+		return -EINVAL;
+	}
+	ret = rte_eth_dev_hairpin_capability_get(port_id, &cap);
+	if (ret != 0)
+		return ret;
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
+				-ENOTSUP);
+	rte_eth_dev_info_get(port_id, &dev_info);
+	/* Use default specified by driver, if nb_tx_desc is zero */
+	if (nb_tx_desc == 0)
+		nb_tx_desc = cap.max_nb_desc;
+	if (nb_tx_desc > cap.max_nb_desc) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for nb_tx_desc(=%hu), should be: "
+			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
+		return -EINVAL;
+	}
+	if (conf->peer_n > cap.max_tx_2_rx) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: <= %hu", conf->peer_n,
+			       cap.max_tx_2_rx);
+		return -EINVAL;
+	}
+	if (conf->peer_n == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: > 0", conf->peer_n);
+		return -EINVAL;
+	}
+	if (cap.max_n_queues != UINT16_MAX) {
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			if (dev->data->tx_queue_state[i] ==
+			    RTE_ETH_QUEUE_STATE_HAIRPIN)
+				count++;
+		}
+		if (count > cap.max_n_queues) {
+			RTE_ETHDEV_LOG(ERR,
+				       "To many Rx hairpin queues %d", count);
+			return -EINVAL;
+		}
+	}
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
+		return -EBUSY;
+	if (dev->data->dev_started &&
+		(dev->data->tx_queue_state[tx_queue_id] !=
+		 RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+	txq = dev->data->tx_queues;
+	if (txq[tx_queue_id] != NULL) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
+					-ENOTSUP);
+		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
+		txq[tx_queue_id] = NULL;
+	}
+	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
+		(dev, tx_queue_id, nb_tx_desc, conf);
+	if (ret == 0)
+		dev->data->tx_queue_state[tx_queue_id] =
+			RTE_ETH_QUEUE_STATE_HAIRPIN;
+	return eth_err(port_id, ret);
 }
 
 void
@@ -3981,12 +4183,20 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	rte_errno = ENOTSUP;
 	return NULL;
 #endif
+	struct rte_eth_dev *dev;
+
 	/* check input parameters */
 	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
 		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
 		rte_errno = EINVAL;
 		return NULL;
 	}
+	dev = &rte_eth_devices[port_id];
+	if (dev->data->rx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
 	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
 
 	if (cb == NULL) {
@@ -4058,6 +4268,8 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	rte_errno = ENOTSUP;
 	return NULL;
 #endif
+	struct rte_eth_dev *dev;
+
 	/* check input parameters */
 	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
 		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
@@ -4065,6 +4277,13 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 		return NULL;
 	}
 
+	dev = &rte_eth_devices[port_id];
+	if (dev->data->tx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
 	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
 
 	if (cb == NULL) {
@@ -4180,6 +4399,14 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP);
 
+	if (dev->data->rx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n", queue_id,
+			port_id);
+		return -EINVAL;
+	}
+
 	memset(qinfo, 0, sizeof(*qinfo));
 	dev->dev_ops->rxq_info_get(dev, queue_id, qinfo);
 	return 0;
@@ -4202,6 +4429,14 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 		return -EINVAL;
 	}
 
+	if (dev->data->tx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(INFO, "Queue %"PRIu16" of device with "
+			"port_id=%"PRIu16" is hairpin queue\n", queue_id,
+			port_id);
+		return -EINVAL;
+	}
+
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP);
 
 	memset(qinfo, 0, sizeof(*qinfo));
@@ -4510,6 +4745,21 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 }
 
 int
+rte_eth_dev_hairpin_capability_get(uint16_t port_id,
+				   struct rte_eth_hairpin_cap *cap)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
+				-ENOTSUP);
+	memset(cap, 0, sizeof(*cap));
+	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)(dev, cap));
+}
+
+int
 rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
 {
 	struct rte_eth_dev *dev;
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index d937fb4..51843c1 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -804,6 +804,46 @@  struct rte_eth_txconf {
 };
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to return the hairpin capabilities that are supported.
+ */
+struct rte_eth_hairpin_cap {
+	uint16_t max_n_queues;
+	/**< The max number of hairpin queues (different bindings). */
+	uint16_t max_rx_2_tx;
+	/**< Max number of Rx queues to be connected to one Tx queue. */
+	uint16_t max_tx_2_rx;
+	/**< Max number of Tx queues to be connected to one Rx queue. */
+	uint16_t max_nb_desc; /**< The max num of descriptors. */
+};
+
+#define RTE_ETH_MAX_HAIRPIN_PEERS 32
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to hold hairpin peer data.
+ */
+struct rte_eth_hairpin_peer {
+	uint16_t port; /**< Peer port. */
+	uint16_t queue; /**< Peer queue. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to configure hairpin binding.
+ */
+struct rte_eth_hairpin_conf {
+	uint16_t peer_n; /**< The number of peers. */
+	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
+};
+
+/**
  * A structure contains information about HW descriptor ring limitations.
  */
 struct rte_eth_desc_lim {
@@ -1277,6 +1317,7 @@  struct rte_eth_dcb_info {
  */
 #define RTE_ETH_QUEUE_STATE_STOPPED 0
 #define RTE_ETH_QUEUE_STATE_STARTED 1
+#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
 
 #define RTE_ETH_ALL RTE_MAX_ETHPORTS
 
@@ -1771,6 +1812,36 @@  int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		struct rte_mempool *mb_pool);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Allocate and set up a hairpin receive queue for an Ethernet device.
+ *
+ * The function set up the selected queue to be used in hairpin.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue to set up.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_rx_desc
+ *   The number of receive descriptors to allocate for the receive ring.
+ *   0 means the PMD will use default value.
+ * @param conf
+ *   The pointer to the hairpin configuration.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ *   - (-ENOMEM) if unable to allocate the resources.
+ */
+__rte_experimental
+int rte_eth_rx_hairpin_queue_setup
+	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+
+/**
  * Allocate and set up a transmit queue for an Ethernet device.
  *
  * @param port_id
@@ -1823,6 +1894,35 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		const struct rte_eth_txconf *tx_conf);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Allocate and set up a transmit hairpin queue for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue to set up.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_tx_desc
+ *   The number of transmit descriptors to allocate for the transmit ring.
+ *   0 to set default PMD value.
+ * @param conf
+ *   The hairpin configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ *   - (-ENOMEM) if unable to allocate the resources.
+ */
+__rte_experimental
+int rte_eth_tx_hairpin_queue_setup
+	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+
+/**
  * Return the NUMA socket to which an Ethernet device is connected
  *
  * @param port_id
@@ -1857,7 +1957,7 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
  *   to rte_eth_dev_configure().
  * @return
  *   - 0: Success, the receive queue is started.
- *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -EINVAL: The port_id or the queue_id out of range or belong to hairpin.
  *   - -EIO: if device is removed.
  *   - -ENOTSUP: The function not supported in PMD driver.
  */
@@ -1874,7 +1974,7 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
  *   to rte_eth_dev_configure().
  * @return
  *   - 0: Success, the receive queue is stopped.
- *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -EINVAL: The port_id or the queue_id out of range or belong to hairpin.
  *   - -EIO: if device is removed.
  *   - -ENOTSUP: The function not supported in PMD driver.
  */
@@ -1892,7 +1992,7 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
  *   to rte_eth_dev_configure().
  * @return
  *   - 0: Success, the transmit queue is started.
- *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -EINVAL: The port_id or the queue_id out of range or belong to hairpin.
  *   - -EIO: if device is removed.
  *   - -ENOTSUP: The function not supported in PMD driver.
  */
@@ -1909,7 +2009,7 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
  *   to rte_eth_dev_configure().
  * @return
  *   - 0: Success, the transmit queue is stopped.
- *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -EINVAL: The port_id or the queue_id out of range or belong to hairpin.
  *   - -EIO: if device is removed.
  *   - -ENOTSUP: The function not supported in PMD driver.
  */
@@ -3575,7 +3675,8 @@  int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
  * @return
  *   - 0: Success
  *   - -ENOTSUP: routine is not supported by the device PMD.
- *   - -EINVAL:  The port_id or the queue_id is out of range.
+ *   - -EINVAL:  The port_id or the queue_id is out of range, or the queue
+ *               is hairpin queue.
  */
 int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo);
@@ -3595,7 +3696,8 @@  int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
  * @return
  *   - 0: Success
  *   - -ENOTSUP: routine is not supported by the device PMD.
- *   - -EINVAL:  The port_id or the queue_id is out of range.
+ *   - -EINVAL:  The port_id or the queue_id is out of range, or the queue
+ *               is hairpin queue.
  */
 int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
@@ -4037,6 +4139,23 @@  int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
 void *
 rte_eth_dev_get_sec_ctx(uint16_t port_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Query the device hairpin capabilities.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param cap
+ *   Pointer to a structure that will hold the hairpin capabilities.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ */
+__rte_experimental
+int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
+				       struct rte_eth_hairpin_cap *cap);
 
 #include <rte_ethdev_core.h>
 
@@ -4137,6 +4256,12 @@  int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
 		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return 0;
 	}
+	if (dev->data->rx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(ERR, "RX queue_id=%u is hairpin queue\n",
+			       queue_id);
+		return 0;
+	}
 #endif
 	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
 				     rx_pkts, nb_pkts);
@@ -4403,6 +4528,12 @@  static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
 		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return 0;
 	}
+	if (dev->data->tx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(ERR, "TX queue_id=%u is hairpin queue\n",
+			       queue_id);
+		return 0;
+	}
 #endif
 
 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index dcb5ae6..ef46e71 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -250,6 +250,12 @@  typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
 				    struct rte_mempool *mb_pool);
 /**< @internal Set up a receive queue of an Ethernet device. */
 
+typedef int (*eth_rx_hairpin_queue_setup_t)
+	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+	 uint16_t nb_rx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+/**< @internal Set up a receive hairpin queue of an Ethernet device. */
+
 typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    uint16_t tx_queue_id,
 				    uint16_t nb_tx_desc,
@@ -257,6 +263,12 @@  typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_tx_hairpin_queue_setup_t)
+	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+	 uint16_t nb_tx_desc,
+	 const struct rte_eth_hairpin_conf *hairpin_conf);
+/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
+
 typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
 				    uint16_t rx_queue_id);
 /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
@@ -505,6 +517,10 @@  typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev,
 						const char *pool);
 /**< @internal Test if a port supports specific mempool ops */
 
+typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
+				     struct rte_eth_hairpin_cap *cap);
+/**< @internal get the hairpin capabilities. */
+
 /**
  * @internal A structure containing the functions exported by an Ethernet driver.
  */
@@ -557,6 +573,8 @@  struct eth_dev_ops {
 	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
 	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
 	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue. */
+	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
+	/**< Set up device RX hairpin queue. */
 	eth_queue_release_t        rx_queue_release; /**< Release RX queue. */
 	eth_rx_queue_count_t       rx_queue_count;
 	/**< Get the number of used RX descriptors. */
@@ -568,6 +586,8 @@  struct eth_dev_ops {
 	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue interrupt. */
 	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt. */
 	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue. */
+	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
+	/**< Set up device TX hairpin queue. */
 	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
 	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
 
@@ -639,6 +659,9 @@  struct eth_dev_ops {
 
 	eth_pool_ops_supported_t pool_ops_supported;
 	/**< Test if a port supports specific mempool ops */
+
+	eth_hairpin_cap_get_t hairpin_cap_get;
+	/**< Returns the hairpin capabilities. */
 };
 
 /**
@@ -746,9 +769,9 @@  struct rte_eth_dev_data {
 		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
 		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
 	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-			/**< Queues state: STARTED(1) / STOPPED(0). */
+		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
 	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-			/**< Queues state: STARTED(1) / STOPPED(0). */
+		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
 	uint32_t dev_flags;             /**< Capabilities. */
 	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
 	int numa_node;                  /**< NUMA node connection. */
diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 6df42a4..77b0a86 100644
--- a/lib/librte_ethdev/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -283,4 +283,9 @@  EXPERIMENTAL {
 
 	# added in 19.08
 	rte_eth_read_clock;
+
+	# added in 19.11
+	rte_eth_rx_hairpin_queue_setup;
+	rte_eth_tx_hairpin_queue_setup;
+	rte_eth_dev_hairpin_capability_get;
 };