[v3,05/10] net/cpfl: support hairpin queue setup and release

Message ID 20230519073116.56749-6-beilei.xing@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series net/cpfl: add hairpin queue support |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xing, Beilei May 19, 2023, 7:31 a.m. UTC
  From: Beilei Xing <beilei.xing@intel.com>

Support hairpin Rx/Tx queue setup and release.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Signed-off-by: Mingxia Liu <mingxia.liu@intel.com>
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
 drivers/net/cpfl/cpfl_ethdev.c          |   6 +
 drivers/net/cpfl/cpfl_ethdev.h          |  12 +
 drivers/net/cpfl/cpfl_rxtx.c            | 373 +++++++++++++++++++++++-
 drivers/net/cpfl/cpfl_rxtx.h            |  26 ++
 drivers/net/cpfl/cpfl_rxtx_vec_common.h |   4 +
 5 files changed, 420 insertions(+), 1 deletion(-)
  

Comments

Liu, Mingxia May 24, 2023, 9:01 a.m. UTC | #1
> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			    uint16_t nb_desc,
> +			    const struct rte_eth_hairpin_conf *conf) {
> +	struct cpfl_vport *cpfl_vport =
> +	    (struct cpfl_vport *)dev->data->dev_private;
> +
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter_base = vport->adapter;
> +	uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> +	struct cpfl_txq_hairpin_info *hairpin_info;
> +	struct idpf_hw *hw = &adapter_base->hw;
> +	struct cpfl_tx_queue *cpfl_txq;
> +	struct idpf_tx_queue *txq, *cq;
> +	const struct rte_memzone *mz;
> +	uint32_t ring_size;
> +	uint16_t peer_port, peer_q;
> +
> +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +		return -EINVAL;
> +	}
> +
> +	if (conf->peer_count != 1) {
> +		PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> count %d", conf->peer_count);
> +		return -EINVAL;
> +	}
> +
> +	peer_port = conf->peers[0].port;
> +	peer_q = conf->peers[0].queue;
> +
> +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > CPFL_MAX_RING_DESC ||
> +	    nb_desc < CPFL_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}
> +
> +	/* Free memory if needed. */
> +	if (dev->data->tx_queues[queue_idx]) {
> +		cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> +		dev->data->tx_queues[queue_idx] = NULL;
> +	}
> +
> +	/* Allocate the TX queue data structure. */
> +	cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> +				 sizeof(struct cpfl_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 SOCKET_ID_ANY);
> +	if (!cpfl_txq) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> structure");
> +		return -ENOMEM;
> +	}
> +
> +	txq = &cpfl_txq->base;
> +	hairpin_info = &cpfl_txq->hairpin_info;
> +	/* Txq ring length should be 2 times of Tx completion queue size. */
> +	txq->nb_tx_desc = nb_desc * 2;
> +	txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_start_qid, logic_qid);
> +	txq->port_id = dev->data->port_id;
> +	hairpin_info->hairpin_q = true;
> +	hairpin_info->peer_rxp = peer_port;
> +	hairpin_info->peer_rxq_id = peer_q;
> +
> +	if (conf->manual_bind != 0)
> +		cpfl_vport->p2p_manual_bind = true;
> +	else
> +		cpfl_vport->p2p_manual_bind = false;
> +
> +	/* Always Tx hairpin queue allocates Tx HW ring */
> +	ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +			      CPFL_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> +				      ring_size + CPFL_P2P_RING_BUF,
> +				      CPFL_RING_BASE_ALIGN,
> +				      dev->device->numa_node);
> +	if (!mz) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->desc_ring = mz->addr;
> +	txq->mz = mz;
> +
> +	cpfl_tx_hairpin_descq_reset(txq);
> +	txq->qtx_tail = hw->hw_addr +
> +		cpfl_hw_qtail_get(cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_start,
> +				  logic_qid, cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_spacing);
> +	txq->ops = &def_txq_ops;
> +
> +	if (cpfl_vport->p2p_tx_complq == NULL) {
[Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to identify if it is the first time to allocate "p2p_rx_bufq" buffer, 
Can it be unified, using logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq == NULL ?



> -----Original Message-----
> From: Xing, Beilei <beilei.xing@intel.com>
> Sent: Friday, May 19, 2023 3:31 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Liu, Mingxia <mingxia.liu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Wang, Xiao W <xiao.w.wang@intel.com>
> Subject: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and release
> 
> From: Beilei Xing <beilei.xing@intel.com>
> 
> Support hairpin Rx/Tx queue setup and release.
> 
> Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> Signed-off-by: Mingxia Liu <mingxia.liu@intel.com>
> Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> ---
>  drivers/net/cpfl/cpfl_ethdev.c          |   6 +
>  drivers/net/cpfl/cpfl_ethdev.h          |  12 +
>  drivers/net/cpfl/cpfl_rxtx.c            | 373 +++++++++++++++++++++++-
>  drivers/net/cpfl/cpfl_rxtx.h            |  26 ++
>  drivers/net/cpfl/cpfl_rxtx_vec_common.h |   4 +
>  5 files changed, 420 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index
> 8e471d2a9b..03813716ce 100644
> --- a/drivers/net/cpfl/cpfl_ethdev.c
> +++ b/drivers/net/cpfl/cpfl_ethdev.c
> @@ -874,6 +874,10 @@ cpfl_dev_close(struct rte_eth_dev *dev)
>  	struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport-
> >adapter);
> 
>  	cpfl_dev_stop(dev);
> +	if (cpfl_vport->p2p_mp) {
> +		rte_mempool_free(cpfl_vport->p2p_mp);
> +		cpfl_vport->p2p_mp = NULL;
> +	}
> 
>  	if (!adapter->base.is_rx_singleq && !adapter->base.is_tx_singleq)
>  		cpfl_p2p_queue_grps_del(vport);
> @@ -916,6 +920,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = {
>  	.xstats_get_names		= cpfl_dev_xstats_get_names,
>  	.xstats_reset			= cpfl_dev_xstats_reset,
>  	.hairpin_cap_get		= cpfl_hairpin_cap_get,
> +	.rx_hairpin_queue_setup		= cpfl_rx_hairpin_queue_setup,
> +	.tx_hairpin_queue_setup		= cpfl_tx_hairpin_queue_setup,
>  };
> 
>  static int
> diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h index
> 65c9a195b2..a48344299c 100644
> --- a/drivers/net/cpfl/cpfl_ethdev.h
> +++ b/drivers/net/cpfl/cpfl_ethdev.h
> @@ -89,6 +89,18 @@ struct p2p_queue_chunks_info {  struct cpfl_vport {
>  	struct idpf_vport base;
>  	struct p2p_queue_chunks_info p2p_q_chunks_info;
> +
> +	struct rte_mempool *p2p_mp;
> +
> +	uint16_t nb_data_rxq;
> +	uint16_t nb_data_txq;
> +	uint16_t nb_p2p_rxq;
> +	uint16_t nb_p2p_txq;
> +
> +	struct idpf_rx_queue *p2p_rx_bufq;
> +	struct idpf_tx_queue *p2p_tx_complq;
> +	bool p2p_manual_bind;
> +
>  };
> 
>  struct cpfl_adapter_ext {
> diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index
> 04a51b8d15..333a399e73 100644
> --- a/drivers/net/cpfl/cpfl_rxtx.c
> +++ b/drivers/net/cpfl/cpfl_rxtx.c
> @@ -10,6 +10,79 @@
>  #include "cpfl_rxtx.h"
>  #include "cpfl_rxtx_vec_common.h"
> 
> +uint16_t
> +cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset) {
> +	return start_qid + offset;
> +}
> +
> +uint64_t
> +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> +tail_spacing) {
> +	return tail_start + offset * tail_spacing; }
> +
> +static inline void
> +cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq) {
> +	uint32_t i, size;
> +
> +	if (!txq) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +		return;
> +	}
> +
> +	size = txq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)txq->desc_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_tx_hairpin_complq_reset(struct idpf_tx_queue *cq) {
> +	uint32_t i, size;
> +
> +	if (!cq) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
> +		return;
> +	}
> +
> +	size = cq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)cq->compl_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_rx_hairpin_descq_reset(struct idpf_rx_queue *rxq) {
> +	uint16_t len;
> +	uint32_t i;
> +
> +	if (!rxq)
> +		return;
> +
> +	len = rxq->nb_rx_desc;
> +	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> +		((volatile char *)rxq->rx_ring)[i] = 0; }
> +
> +static inline void
> +cpfl_rx_hairpin_bufq_reset(struct idpf_rx_queue *rxbq) {
> +	uint16_t len;
> +	uint32_t i;
> +
> +	if (!rxbq)
> +		return;
> +
> +	len = rxbq->nb_rx_desc;
> +	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> +		((volatile char *)rxbq->rx_ring)[i] = 0;
> +
> +	rxbq->bufq1 = NULL;
> +	rxbq->bufq2 = NULL;
> +}
> +
>  static uint64_t
>  cpfl_rx_offload_convert(uint64_t offload)  { @@ -234,7 +307,10 @@
> cpfl_rx_queue_release(void *rxq)
> 
>  	/* Split queue */
>  	if (!q->adapter->is_rx_singleq) {
> -		if (q->bufq2)
> +		/* the mz is shared between Tx/Rx hairpin, let Rx_release
> +		 * free the buf, q->bufq1->mz and q->mz.
> +		 */
> +		if (!cpfl_rxq->hairpin_info.hairpin_q && q->bufq2)
>  			cpfl_rx_split_bufq_release(q->bufq2);
> 
>  		if (q->bufq1)
> @@ -385,6 +461,7 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t
> queue_idx,
>  		}
>  	}
> 
> +	cpfl_vport->nb_data_rxq++;
>  	rxq->q_set = true;
>  	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> 
> @@ -548,6 +625,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t
> queue_idx,
>  	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
>  			queue_idx * vport->chunks_info.tx_qtail_spacing);
>  	txq->ops = &def_txq_ops;
> +	cpfl_vport->nb_data_txq++;
>  	txq->q_set = true;
>  	dev->data->tx_queues[queue_idx] = cpfl_txq;
> 
> @@ -562,6 +640,297 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
>  	return ret;
>  }
> 
> +static int
> +cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue
> *bufq,
> +			   uint16_t logic_qid, uint16_t nb_desc) {
> +	struct cpfl_vport *cpfl_vport =
> +	    (struct cpfl_vport *)dev->data->dev_private;
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	struct rte_mempool *mp;
> +	char pool_name[RTE_MEMPOOL_NAMESIZE];
> +
> +	mp = cpfl_vport->p2p_mp;
> +	if (!mp) {
> +		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE,
> "p2p_mb_pool_%u",
> +			 dev->data->port_id);
> +		mp = rte_pktmbuf_pool_create(pool_name,
> CPFL_P2P_NB_MBUF, CPFL_P2P_CACHE_SIZE,
> +					     0, CPFL_P2P_MBUF_SIZE, dev-
> >device->numa_node);
> +		if (!mp) {
> +			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for
> p2p");
> +			return -ENOMEM;
> +		}
> +		cpfl_vport->p2p_mp = mp;
> +	}
> +
> +	bufq->mp = mp;
> +	bufq->nb_rx_desc = nb_desc;
> +	bufq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
> +	bufq->port_id = dev->data->port_id;
> +	bufq->adapter = adapter;
> +	bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> RTE_PKTMBUF_HEADROOM;
> +
> +	bufq->sw_ring = rte_zmalloc("sw ring",
> +				    sizeof(struct rte_mbuf *) * nb_desc,
> +				    RTE_CACHE_LINE_SIZE);
> +	if (!bufq->sw_ring) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
> +		return -ENOMEM;
> +	}
> +
> +	bufq->q_set = true;
> +	bufq->ops = &def_rxq_ops;
> +
> +	return 0;
> +}
> +
> +int
> +cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			    uint16_t nb_desc,
> +			    const struct rte_eth_hairpin_conf *conf) {
> +	struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data-
> >dev_private;
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter_base = vport->adapter;
> +	uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
> +	struct cpfl_rxq_hairpin_info *hairpin_info;
> +	struct cpfl_rx_queue *cpfl_rxq;
> +	struct idpf_rx_queue *bufq1 = NULL;
> +	struct idpf_rx_queue *rxq;
> +	uint16_t peer_port, peer_q;
> +	uint16_t qid;
> +	int ret;
> +
> +	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +		return -EINVAL;
> +	}
> +
> +	if (conf->peer_count != 1) {
> +		PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer
> count %d", conf->peer_count);
> +		return -EINVAL;
> +	}
> +
> +	peer_port = conf->peers[0].port;
> +	peer_q = conf->peers[0].queue;
> +
> +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > CPFL_MAX_RING_DESC ||
> +	    nb_desc < CPFL_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is
> invalid", nb_desc);
> +		return -EINVAL;
> +	}
> +
> +	/* Free memory if needed */
> +	if (dev->data->rx_queues[queue_idx]) {
> +		cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
> +		dev->data->rx_queues[queue_idx] = NULL;
> +	}
> +
> +	/* Setup Rx description queue */
> +	cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
> +				 sizeof(struct cpfl_rx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 SOCKET_ID_ANY);
> +	if (!cpfl_rxq) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue
> data structure");
> +		return -ENOMEM;
> +	}
> +
> +	rxq = &cpfl_rxq->base;
> +	hairpin_info = &cpfl_rxq->hairpin_info;
> +	rxq->nb_rx_desc = nb_desc * 2;
> +	rxq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.rx_start_qid, logic_qid);
> +	rxq->port_id = dev->data->port_id;
> +	rxq->adapter = adapter_base;
> +	rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
> +	hairpin_info->hairpin_q = true;
> +	hairpin_info->peer_txp = peer_port;
> +	hairpin_info->peer_txq_id = peer_q;
> +
> +	if (conf->manual_bind != 0)
> +		cpfl_vport->p2p_manual_bind = true;
> +	else
> +		cpfl_vport->p2p_manual_bind = false;
> +
> +	/* setup 1 Rx buffer queue for the 1st hairpin rxq */
> +	if (logic_qid == 0) {
> +		bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
> +					   sizeof(struct idpf_rx_queue),
> +					   RTE_CACHE_LINE_SIZE,
> +					   SOCKET_ID_ANY);
> +		if (!bufq1) {
> +			PMD_INIT_LOG(ERR, "Failed to allocate memory for
> hairpin Rx buffer queue 1.");
> +			ret = -ENOMEM;
> +			goto err_alloc_bufq1;
> +		}
> +		qid = 2 * logic_qid;
> +		ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
> +		if (ret) {
> +			PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer
> queue 1");
> +			ret = -EINVAL;
> +			goto err_setup_bufq1;
> +		}
> +		cpfl_vport->p2p_rx_bufq = bufq1;
> +	}
> +
> +	rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
> +	rxq->bufq2 = NULL;
> +
> +	cpfl_vport->nb_p2p_rxq++;
> +	rxq->q_set = true;
> +	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> +
> +	return 0;
> +
> +err_setup_bufq1:
> +	rte_free(bufq1);
> +err_alloc_bufq1:
> +	rte_free(rxq);
> +
> +	return ret;
> +}
> +
> +int
> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			    uint16_t nb_desc,
> +			    const struct rte_eth_hairpin_conf *conf) {
> +	struct cpfl_vport *cpfl_vport =
> +	    (struct cpfl_vport *)dev->data->dev_private;
> +
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter_base = vport->adapter;
> +	uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> +	struct cpfl_txq_hairpin_info *hairpin_info;
> +	struct idpf_hw *hw = &adapter_base->hw;
> +	struct cpfl_tx_queue *cpfl_txq;
> +	struct idpf_tx_queue *txq, *cq;
> +	const struct rte_memzone *mz;
> +	uint32_t ring_size;
> +	uint16_t peer_port, peer_q;
> +
> +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> +		return -EINVAL;
> +	}
> +
> +	if (conf->peer_count != 1) {
> +		PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> count %d", conf->peer_count);
> +		return -EINVAL;
> +	}
> +
> +	peer_port = conf->peers[0].port;
> +	peer_q = conf->peers[0].queue;
> +
> +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > CPFL_MAX_RING_DESC ||
> +	    nb_desc < CPFL_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}
> +
> +	/* Free memory if needed. */
> +	if (dev->data->tx_queues[queue_idx]) {
> +		cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> +		dev->data->tx_queues[queue_idx] = NULL;
> +	}
> +
> +	/* Allocate the TX queue data structure. */
> +	cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> +				 sizeof(struct cpfl_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 SOCKET_ID_ANY);
> +	if (!cpfl_txq) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> structure");
> +		return -ENOMEM;
> +	}
> +
> +	txq = &cpfl_txq->base;
> +	hairpin_info = &cpfl_txq->hairpin_info;
> +	/* Txq ring length should be 2 times of Tx completion queue size. */
> +	txq->nb_tx_desc = nb_desc * 2;
> +	txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_start_qid, logic_qid);
> +	txq->port_id = dev->data->port_id;
> +	hairpin_info->hairpin_q = true;
> +	hairpin_info->peer_rxp = peer_port;
> +	hairpin_info->peer_rxq_id = peer_q;
> +
> +	if (conf->manual_bind != 0)
> +		cpfl_vport->p2p_manual_bind = true;
> +	else
> +		cpfl_vport->p2p_manual_bind = false;
> +
> +	/* Always Tx hairpin queue allocates Tx HW ring */
> +	ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +			      CPFL_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> +				      ring_size + CPFL_P2P_RING_BUF,
> +				      CPFL_RING_BASE_ALIGN,
> +				      dev->device->numa_node);
> +	if (!mz) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->desc_ring = mz->addr;
> +	txq->mz = mz;
> +
> +	cpfl_tx_hairpin_descq_reset(txq);
> +	txq->qtx_tail = hw->hw_addr +
> +		cpfl_hw_qtail_get(cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_start,
> +				  logic_qid, cpfl_vport-
> >p2p_q_chunks_info.tx_qtail_spacing);
> +	txq->ops = &def_txq_ops;
> +
> +	if (cpfl_vport->p2p_tx_complq == NULL) {
[Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to identify if it is the first time to allocate "p2p_rx_bufq" buffer, 
Can it be unified, using logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq == NULL ?
> +		cq = rte_zmalloc_socket("cpfl hairpin cq",
> +					sizeof(struct idpf_tx_queue),
> +					RTE_CACHE_LINE_SIZE,
> +					dev->device->numa_node);
> +		if (!cq) {
> +			PMD_INIT_LOG(ERR, "Failed to allocate memory for tx
> queue structure");
> +			return -ENOMEM;
> +		}
> +
> +		cq->nb_tx_desc = nb_desc;
> +		cq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.tx_compl_start_qid, 0);
> +		cq->port_id = dev->data->port_id;
> +
> +		/* Tx completion queue always allocates the HW ring */
> +		ring_size = RTE_ALIGN(cq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> +				      CPFL_DMA_MEM_ALIGN);
> +		mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_compl_ring",
> logic_qid,
> +					      ring_size + CPFL_P2P_RING_BUF,
> +					      CPFL_RING_BASE_ALIGN,
> +					      dev->device->numa_node);
> +		if (!mz) {
> +			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory
> for TX completion queue");
> +			rte_free(txq->sw_ring);
> +			rte_free(txq);
> +			return -ENOMEM;
> +		}
> +		cq->tx_ring_phys_addr = mz->iova;
> +		cq->compl_ring = mz->addr;
> +		cq->mz = mz;
> +
> +		cpfl_tx_hairpin_complq_reset(cq);
> +		cpfl_vport->p2p_tx_complq = cq;
> +	}
> +
> +	txq->complq = cpfl_vport->p2p_tx_complq;
> +
> +	cpfl_vport->nb_p2p_txq++;
> +	txq->q_set = true;
> +	dev->data->tx_queues[queue_idx] = cpfl_txq;
> +
> +	return 0;
> +}
> +
>  int
>  cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)  { @@ -
> 865,6 +1234,8 @@ cpfl_set_rx_function(struct rte_eth_dev *dev)
>  		if (vport->rx_vec_allowed) {
>  			for (i = 0; i < dev->data->nb_rx_queues; i++) {
>  				cpfl_rxq = dev->data->rx_queues[i];
> +				if (cpfl_rxq->hairpin_info.hairpin_q)
> +					continue;
>  				(void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq-
> >base);
>  			}
>  #ifdef CC_AVX512_SUPPORT
> diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index
> 3a87a1f4b3..5e9f2dada7 100644
> --- a/drivers/net/cpfl/cpfl_rxtx.h
> +++ b/drivers/net/cpfl/cpfl_rxtx.h
> @@ -13,6 +13,7 @@
>  #define CPFL_MIN_RING_DESC	32
>  #define CPFL_MAX_RING_DESC	4096
>  #define CPFL_DMA_MEM_ALIGN	4096
> +#define CPFL_P2P_DESC_LEN		16
>  #define CPFL_MAX_HAIRPINQ_RX_2_TX	1
>  #define CPFL_MAX_HAIRPINQ_TX_2_RX	1
>  #define CPFL_MAX_HAIRPINQ_NB_DESC	1024
> @@ -21,6 +22,10 @@
>  #define CPFL_P2P_NB_TX_COMPLQ		1
>  #define CPFL_P2P_NB_QUEUE_GRPS		1
>  #define CPFL_P2P_QUEUE_GRP_ID		1
> +#define CPFL_P2P_NB_MBUF		4096
> +#define CPFL_P2P_CACHE_SIZE		250
> +#define CPFL_P2P_MBUF_SIZE		2048
> +#define CPFL_P2P_RING_BUF		128
>  /* Base address of the HW descriptor ring should be 128B aligned. */
>  #define CPFL_RING_BASE_ALIGN	128
> 
> @@ -31,12 +36,26 @@
> 
>  #define CPFL_SUPPORT_CHAIN_NUM 5
> 
> +struct cpfl_rxq_hairpin_info {
> +	bool hairpin_q;		/* if rx queue is a hairpin queue */
> +	uint16_t peer_txp;
> +	uint16_t peer_txq_id;
> +};
> +
>  struct cpfl_rx_queue {
>  	struct idpf_rx_queue base;
> +	struct cpfl_rxq_hairpin_info hairpin_info; };
> +
> +struct cpfl_txq_hairpin_info {
> +	bool hairpin_q;		/* if tx queue is a hairpin queue */
> +	uint16_t peer_rxp;
> +	uint16_t peer_rxq_id;
>  };
> 
>  struct cpfl_tx_queue {
>  	struct idpf_tx_queue base;
> +	struct cpfl_txq_hairpin_info hairpin_info;
>  };
> 
>  int cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, @@ -
> 57,4 +76,11 @@ void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev,
> uint16_t qid);  void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev,
> uint16_t qid);  void cpfl_set_rx_function(struct rte_eth_dev *dev);  void
> cpfl_set_tx_function(struct rte_eth_dev *dev);
> +uint16_t cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset); uint64_t
> +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> +tail_spacing); int cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
> +				uint16_t nb_desc, const struct
> rte_eth_hairpin_conf *conf); int
> +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +				uint16_t nb_desc,
> +				const struct rte_eth_hairpin_conf *conf);
>  #endif /* _CPFL_RXTX_H_ */
> diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> index 5690b17911..d8e9191196 100644
> --- a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> +++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> @@ -85,6 +85,8 @@ cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
>  		cpfl_rxq = dev->data->rx_queues[i];
>  		default_ret = cpfl_rx_vec_queue_default(&cpfl_rxq->base);
>  		if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
> +			if (cpfl_rxq->hairpin_info.hairpin_q)
> +				continue;
>  			splitq_ret = cpfl_rx_splitq_vec_default(&cpfl_rxq-
> >base);
>  			ret = splitq_ret && default_ret;
>  		} else {
> @@ -106,6 +108,8 @@ cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
> 
>  	for (i = 0; i < dev->data->nb_tx_queues; i++) {
>  		cpfl_txq = dev->data->tx_queues[i];
> +		if (cpfl_txq->hairpin_info.hairpin_q)
> +			continue;
>  		ret = cpfl_tx_vec_queue_default(&cpfl_txq->base);
>  		if (ret == CPFL_SCALAR_PATH)
>  			return CPFL_SCALAR_PATH;
> --
> 2.26.2
  
Jingjing Wu May 25, 2023, 3:58 a.m. UTC | #2
> 
> +static int
> +cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *bufq,
> +			   uint16_t logic_qid, uint16_t nb_desc)
> +{
> +	struct cpfl_vport *cpfl_vport =
> +	    (struct cpfl_vport *)dev->data->dev_private;
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	struct rte_mempool *mp;
> +	char pool_name[RTE_MEMPOOL_NAMESIZE];
> +
> +	mp = cpfl_vport->p2p_mp;
> +	if (!mp) {
> +		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE, "p2p_mb_pool_%u",
> +			 dev->data->port_id);
> +		mp = rte_pktmbuf_pool_create(pool_name, CPFL_P2P_NB_MBUF,
> CPFL_P2P_CACHE_SIZE,
> +					     0, CPFL_P2P_MBUF_SIZE, dev->device-
> >numa_node);
> +		if (!mp) {
> +			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for p2p");
> +			return -ENOMEM;
> +		}
> +		cpfl_vport->p2p_mp = mp;
> +	}
> +
> +	bufq->mp = mp;
> +	bufq->nb_rx_desc = nb_desc;
> +	bufq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> >p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
> +	bufq->port_id = dev->data->port_id;
> +	bufq->adapter = adapter;
> +	bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
> +
> +	bufq->sw_ring = rte_zmalloc("sw ring",
> +				    sizeof(struct rte_mbuf *) * nb_desc,
> +				    RTE_CACHE_LINE_SIZE);

Is sw_ring required in p2p case? It has been never used right?
Please also check the sw_ring in tx queue.

> +	if (!bufq->sw_ring) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
> +		return -ENOMEM;
> +	}
> +
> +	bufq->q_set = true;
> +	bufq->ops = &def_rxq_ops;
> +
> +	return 0;
> +}
> +
> +int
> +cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			    uint16_t nb_desc,
> +			    const struct rte_eth_hairpin_conf *conf)
> +{
> +	struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data->dev_private;
> +	struct idpf_vport *vport = &cpfl_vport->base;
> +	struct idpf_adapter *adapter_base = vport->adapter;
> +	uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
> +	struct cpfl_rxq_hairpin_info *hairpin_info;
> +	struct cpfl_rx_queue *cpfl_rxq;
> +	struct idpf_rx_queue *bufq1 = NULL;
> +	struct idpf_rx_queue *rxq;
> +	uint16_t peer_port, peer_q;
> +	uint16_t qid;
> +	int ret;
> +
> +	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin queue.");
> +		return -EINVAL;
> +	}
> +
> +	if (conf->peer_count != 1) {
> +		PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer count %d",
> conf->peer_count);
> +		return -EINVAL;
> +	}
> +
> +	peer_port = conf->peers[0].port;
> +	peer_q = conf->peers[0].queue;
> +
> +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > CPFL_MAX_RING_DESC ||
> +	    nb_desc < CPFL_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is invalid",
> nb_desc);
> +		return -EINVAL;
> +	}
> +
> +	/* Free memory if needed */
> +	if (dev->data->rx_queues[queue_idx]) {
> +		cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
> +		dev->data->rx_queues[queue_idx] = NULL;
> +	}
> +
> +	/* Setup Rx description queue */
> +	cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
> +				 sizeof(struct cpfl_rx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 SOCKET_ID_ANY);
> +	if (!cpfl_rxq) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue data
> structure");
> +		return -ENOMEM;
> +	}
> +
> +	rxq = &cpfl_rxq->base;
> +	hairpin_info = &cpfl_rxq->hairpin_info;
> +	rxq->nb_rx_desc = nb_desc * 2;
> +	rxq->queue_id = cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.rx_start_qid,
> logic_qid);
> +	rxq->port_id = dev->data->port_id;
> +	rxq->adapter = adapter_base;
> +	rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
> +	hairpin_info->hairpin_q = true;
> +	hairpin_info->peer_txp = peer_port;
> +	hairpin_info->peer_txq_id = peer_q;
> +
> +	if (conf->manual_bind != 0)
> +		cpfl_vport->p2p_manual_bind = true;
> +	else
> +		cpfl_vport->p2p_manual_bind = false;
> +
> +	/* setup 1 Rx buffer queue for the 1st hairpin rxq */
> +	if (logic_qid == 0) {
> +		bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
> +					   sizeof(struct idpf_rx_queue),
> +					   RTE_CACHE_LINE_SIZE,
> +					   SOCKET_ID_ANY);
> +		if (!bufq1) {
> +			PMD_INIT_LOG(ERR, "Failed to allocate memory for hairpin Rx
> buffer queue 1.");
> +			ret = -ENOMEM;
> +			goto err_alloc_bufq1;
> +		}
> +		qid = 2 * logic_qid;

Inside the brace ( if (logic_qid=0) {} ), the logic_qid should be zero, right? What is the purpose of doing qid = 2* logic_qid?

> +		ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
> +		if (ret) {
> +			PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer queue 1");
> +			ret = -EINVAL;
> +			goto err_setup_bufq1;
> +		}
> +		cpfl_vport->p2p_rx_bufq = bufq1;
> +	}
> +
> +	rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
> +	rxq->bufq2 = NULL;
> +

cpfl_vport->p2p_rx_bufq is allocated in this function. But haven't seen where it will be released.
And in cpfl_rx_hairpin_bufq_reset the rxq->bufq1 will be assigned to NULL. Will queue release miss this?

> +	cpfl_vport->nb_p2p_rxq++;
> +	rxq->q_set = true;
> +	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> +
> +	return 0;
> +
> +err_setup_bufq1:
> +	rte_free(bufq1);
> +err_alloc_bufq1:
> +	rte_free(rxq);
> +
> +	return ret;
> +}
> +
  
Xing, Beilei May 26, 2023, 3:46 a.m. UTC | #3
> -----Original Message-----
> From: Liu, Mingxia <mingxia.liu@intel.com>
> Sent: Wednesday, May 24, 2023 5:02 PM
> To: Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Wang, Xiao W <xiao.w.wang@intel.com>
> Subject: RE: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and
> release
> 
> > +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +			    uint16_t nb_desc,
> > +			    const struct rte_eth_hairpin_conf *conf) {
> > +	struct cpfl_vport *cpfl_vport =
> > +	    (struct cpfl_vport *)dev->data->dev_private;
> > +
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter_base = vport->adapter;
> > +	uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> > +	struct cpfl_txq_hairpin_info *hairpin_info;
> > +	struct idpf_hw *hw = &adapter_base->hw;
> > +	struct cpfl_tx_queue *cpfl_txq;
> > +	struct idpf_tx_queue *txq, *cq;
> > +	const struct rte_memzone *mz;
> > +	uint32_t ring_size;
> > +	uint16_t peer_port, peer_q;
> > +
> > +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> > +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> > queue.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (conf->peer_count != 1) {
> > +		PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> > count %d", conf->peer_count);
> > +		return -EINVAL;
> > +	}
> > +
> > +	peer_port = conf->peers[0].port;
> > +	peer_q = conf->peers[0].queue;
> > +
> > +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> > +	    nb_desc > CPFL_MAX_RING_DESC ||
> > +	    nb_desc < CPFL_MIN_RING_DESC) {
> > +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> > invalid",
> > +			     nb_desc);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Free memory if needed. */
> > +	if (dev->data->tx_queues[queue_idx]) {
> > +		cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> > +		dev->data->tx_queues[queue_idx] = NULL;
> > +	}
> > +
> > +	/* Allocate the TX queue data structure. */
> > +	cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> > +				 sizeof(struct cpfl_tx_queue),
> > +				 RTE_CACHE_LINE_SIZE,
> > +				 SOCKET_ID_ANY);
> > +	if (!cpfl_txq) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> > structure");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	txq = &cpfl_txq->base;
> > +	hairpin_info = &cpfl_txq->hairpin_info;
> > +	/* Txq ring length should be 2 times of Tx completion queue size. */
> > +	txq->nb_tx_desc = nb_desc * 2;
> > +	txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.tx_start_qid, logic_qid);
> > +	txq->port_id = dev->data->port_id;
> > +	hairpin_info->hairpin_q = true;
> > +	hairpin_info->peer_rxp = peer_port;
> > +	hairpin_info->peer_rxq_id = peer_q;
> > +
> > +	if (conf->manual_bind != 0)
> > +		cpfl_vport->p2p_manual_bind = true;
> > +	else
> > +		cpfl_vport->p2p_manual_bind = false;
> > +
> > +	/* Always Tx hairpin queue allocates Tx HW ring */
> > +	ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> > +			      CPFL_DMA_MEM_ALIGN);
> > +	mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> > +				      ring_size + CPFL_P2P_RING_BUF,
> > +				      CPFL_RING_BASE_ALIGN,
> > +				      dev->device->numa_node);
> > +	if (!mz) {
> > +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> > +		rte_free(txq->sw_ring);
> > +		rte_free(txq);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	txq->tx_ring_phys_addr = mz->iova;
> > +	txq->desc_ring = mz->addr;
> > +	txq->mz = mz;
> > +
> > +	cpfl_tx_hairpin_descq_reset(txq);
> > +	txq->qtx_tail = hw->hw_addr +
> > +		cpfl_hw_qtail_get(cpfl_vport-
> > >p2p_q_chunks_info.tx_qtail_start,
> > +				  logic_qid, cpfl_vport-
> > >p2p_q_chunks_info.tx_qtail_spacing);
> > +	txq->ops = &def_txq_ops;
> > +
> > +	if (cpfl_vport->p2p_tx_complq == NULL) {
> [Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to identify
> if it is the first time to allocate "p2p_rx_bufq" buffer, Can it be unified, using
> logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq == NULL ?
> 
> 
Yes, thanks for the catch.

> 
> > -----Original Message-----
> > From: Xing, Beilei <beilei.xing@intel.com>
> > Sent: Friday, May 19, 2023 3:31 PM
> > To: Wu, Jingjing <jingjing.wu@intel.com>
> > Cc: dev@dpdk.org; Liu, Mingxia <mingxia.liu@intel.com>; Xing, Beilei
> > <beilei.xing@intel.com>; Wang, Xiao W <xiao.w.wang@intel.com>
> > Subject: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and release
> >
> > From: Beilei Xing <beilei.xing@intel.com>
> >
> > Support hairpin Rx/Tx queue setup and release.
> >
> > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
> > Signed-off-by: Mingxia Liu <mingxia.liu@intel.com>
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > ---
> >  drivers/net/cpfl/cpfl_ethdev.c          |   6 +
> >  drivers/net/cpfl/cpfl_ethdev.h          |  12 +
> >  drivers/net/cpfl/cpfl_rxtx.c            | 373 +++++++++++++++++++++++-
> >  drivers/net/cpfl/cpfl_rxtx.h            |  26 ++
> >  drivers/net/cpfl/cpfl_rxtx_vec_common.h |   4 +
> >  5 files changed, 420 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
> index
> > 8e471d2a9b..03813716ce 100644
> > --- a/drivers/net/cpfl/cpfl_ethdev.c
> > +++ b/drivers/net/cpfl/cpfl_ethdev.c
> > @@ -874,6 +874,10 @@ cpfl_dev_close(struct rte_eth_dev *dev)
> >  	struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport-
> > >adapter);
> >
> >  	cpfl_dev_stop(dev);
> > +	if (cpfl_vport->p2p_mp) {
> > +		rte_mempool_free(cpfl_vport->p2p_mp);
> > +		cpfl_vport->p2p_mp = NULL;
> > +	}
> >
> >  	if (!adapter->base.is_rx_singleq && !adapter->base.is_tx_singleq)
> >  		cpfl_p2p_queue_grps_del(vport);
> > @@ -916,6 +920,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = {
> >  	.xstats_get_names		= cpfl_dev_xstats_get_names,
> >  	.xstats_reset			= cpfl_dev_xstats_reset,
> >  	.hairpin_cap_get		= cpfl_hairpin_cap_get,
> > +	.rx_hairpin_queue_setup		=
> cpfl_rx_hairpin_queue_setup,
> > +	.tx_hairpin_queue_setup		=
> cpfl_tx_hairpin_queue_setup,
> >  };
> >
> >  static int
> > diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h
> index
> > 65c9a195b2..a48344299c 100644
> > --- a/drivers/net/cpfl/cpfl_ethdev.h
> > +++ b/drivers/net/cpfl/cpfl_ethdev.h
> > @@ -89,6 +89,18 @@ struct p2p_queue_chunks_info {  struct cpfl_vport {
> >  	struct idpf_vport base;
> >  	struct p2p_queue_chunks_info p2p_q_chunks_info;
> > +
> > +	struct rte_mempool *p2p_mp;
> > +
> > +	uint16_t nb_data_rxq;
> > +	uint16_t nb_data_txq;
> > +	uint16_t nb_p2p_rxq;
> > +	uint16_t nb_p2p_txq;
> > +
> > +	struct idpf_rx_queue *p2p_rx_bufq;
> > +	struct idpf_tx_queue *p2p_tx_complq;
> > +	bool p2p_manual_bind;
> > +
> >  };
> >
> >  struct cpfl_adapter_ext {
> > diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index
> > 04a51b8d15..333a399e73 100644
> > --- a/drivers/net/cpfl/cpfl_rxtx.c
> > +++ b/drivers/net/cpfl/cpfl_rxtx.c
> > @@ -10,6 +10,79 @@
> >  #include "cpfl_rxtx.h"
> >  #include "cpfl_rxtx_vec_common.h"
> >
> > +uint16_t
> > +cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset) {
> > +	return start_qid + offset;
> > +}
> > +
> > +uint64_t
> > +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> > +tail_spacing) {
> > +	return tail_start + offset * tail_spacing; }
> > +
> > +static inline void
> > +cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq) {
> > +	uint32_t i, size;
> > +
> > +	if (!txq) {
> > +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> > +		return;
> > +	}
> > +
> > +	size = txq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> > +	for (i = 0; i < size; i++)
> > +		((volatile char *)txq->desc_ring)[i] = 0; }
> > +
> > +static inline void
> > +cpfl_tx_hairpin_complq_reset(struct idpf_tx_queue *cq) {
> > +	uint32_t i, size;
> > +
> > +	if (!cq) {
> > +		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
> > +		return;
> > +	}
> > +
> > +	size = cq->nb_tx_desc * CPFL_P2P_DESC_LEN;
> > +	for (i = 0; i < size; i++)
> > +		((volatile char *)cq->compl_ring)[i] = 0; }
> > +
> > +static inline void
> > +cpfl_rx_hairpin_descq_reset(struct idpf_rx_queue *rxq) {
> > +	uint16_t len;
> > +	uint32_t i;
> > +
> > +	if (!rxq)
> > +		return;
> > +
> > +	len = rxq->nb_rx_desc;
> > +	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> > +		((volatile char *)rxq->rx_ring)[i] = 0; }
> > +
> > +static inline void
> > +cpfl_rx_hairpin_bufq_reset(struct idpf_rx_queue *rxbq) {
> > +	uint16_t len;
> > +	uint32_t i;
> > +
> > +	if (!rxbq)
> > +		return;
> > +
> > +	len = rxbq->nb_rx_desc;
> > +	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
> > +		((volatile char *)rxbq->rx_ring)[i] = 0;
> > +
> > +	rxbq->bufq1 = NULL;
> > +	rxbq->bufq2 = NULL;
> > +}
> > +
> >  static uint64_t
> >  cpfl_rx_offload_convert(uint64_t offload)  { @@ -234,7 +307,10 @@
> > cpfl_rx_queue_release(void *rxq)
> >
> >  	/* Split queue */
> >  	if (!q->adapter->is_rx_singleq) {
> > -		if (q->bufq2)
> > +		/* the mz is shared between Tx/Rx hairpin, let Rx_release
> > +		 * free the buf, q->bufq1->mz and q->mz.
> > +		 */
> > +		if (!cpfl_rxq->hairpin_info.hairpin_q && q->bufq2)
> >  			cpfl_rx_split_bufq_release(q->bufq2);
> >
> >  		if (q->bufq1)
> > @@ -385,6 +461,7 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev,
> uint16_t
> > queue_idx,
> >  		}
> >  	}
> >
> > +	cpfl_vport->nb_data_rxq++;
> >  	rxq->q_set = true;
> >  	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> >
> > @@ -548,6 +625,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t
> > queue_idx,
> >  	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> >  			queue_idx * vport->chunks_info.tx_qtail_spacing);
> >  	txq->ops = &def_txq_ops;
> > +	cpfl_vport->nb_data_txq++;
> >  	txq->q_set = true;
> >  	dev->data->tx_queues[queue_idx] = cpfl_txq;
> >
> > @@ -562,6 +640,297 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev,
> > uint16_t queue_idx,
> >  	return ret;
> >  }
> >
> > +static int
> > +cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue
> > *bufq,
> > +			   uint16_t logic_qid, uint16_t nb_desc) {
> > +	struct cpfl_vport *cpfl_vport =
> > +	    (struct cpfl_vport *)dev->data->dev_private;
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter = vport->adapter;
> > +	struct rte_mempool *mp;
> > +	char pool_name[RTE_MEMPOOL_NAMESIZE];
> > +
> > +	mp = cpfl_vport->p2p_mp;
> > +	if (!mp) {
> > +		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE,
> > "p2p_mb_pool_%u",
> > +			 dev->data->port_id);
> > +		mp = rte_pktmbuf_pool_create(pool_name,
> > CPFL_P2P_NB_MBUF, CPFL_P2P_CACHE_SIZE,
> > +					     0, CPFL_P2P_MBUF_SIZE, dev-
> > >device->numa_node);
> > +		if (!mp) {
> > +			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for
> > p2p");
> > +			return -ENOMEM;
> > +		}
> > +		cpfl_vport->p2p_mp = mp;
> > +	}
> > +
> > +	bufq->mp = mp;
> > +	bufq->nb_rx_desc = nb_desc;
> > +	bufq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
> > +	bufq->port_id = dev->data->port_id;
> > +	bufq->adapter = adapter;
> > +	bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> > RTE_PKTMBUF_HEADROOM;
> > +
> > +	bufq->sw_ring = rte_zmalloc("sw ring",
> > +				    sizeof(struct rte_mbuf *) * nb_desc,
> > +				    RTE_CACHE_LINE_SIZE);
> > +	if (!bufq->sw_ring) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	bufq->q_set = true;
> > +	bufq->ops = &def_rxq_ops;
> > +
> > +	return 0;
> > +}
> > +
> > +int
> > +cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +			    uint16_t nb_desc,
> > +			    const struct rte_eth_hairpin_conf *conf) {
> > +	struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data-
> > >dev_private;
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter_base = vport->adapter;
> > +	uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
> > +	struct cpfl_rxq_hairpin_info *hairpin_info;
> > +	struct cpfl_rx_queue *cpfl_rxq;
> > +	struct idpf_rx_queue *bufq1 = NULL;
> > +	struct idpf_rx_queue *rxq;
> > +	uint16_t peer_port, peer_q;
> > +	uint16_t qid;
> > +	int ret;
> > +
> > +	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> > +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> > queue.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (conf->peer_count != 1) {
> > +		PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer
> > count %d", conf->peer_count);
> > +		return -EINVAL;
> > +	}
> > +
> > +	peer_port = conf->peers[0].port;
> > +	peer_q = conf->peers[0].queue;
> > +
> > +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> > +	    nb_desc > CPFL_MAX_RING_DESC ||
> > +	    nb_desc < CPFL_MIN_RING_DESC) {
> > +		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is
> > invalid", nb_desc);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Free memory if needed */
> > +	if (dev->data->rx_queues[queue_idx]) {
> > +		cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
> > +		dev->data->rx_queues[queue_idx] = NULL;
> > +	}
> > +
> > +	/* Setup Rx description queue */
> > +	cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
> > +				 sizeof(struct cpfl_rx_queue),
> > +				 RTE_CACHE_LINE_SIZE,
> > +				 SOCKET_ID_ANY);
> > +	if (!cpfl_rxq) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue
> > data structure");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	rxq = &cpfl_rxq->base;
> > +	hairpin_info = &cpfl_rxq->hairpin_info;
> > +	rxq->nb_rx_desc = nb_desc * 2;
> > +	rxq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.rx_start_qid, logic_qid);
> > +	rxq->port_id = dev->data->port_id;
> > +	rxq->adapter = adapter_base;
> > +	rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> RTE_PKTMBUF_HEADROOM;
> > +	hairpin_info->hairpin_q = true;
> > +	hairpin_info->peer_txp = peer_port;
> > +	hairpin_info->peer_txq_id = peer_q;
> > +
> > +	if (conf->manual_bind != 0)
> > +		cpfl_vport->p2p_manual_bind = true;
> > +	else
> > +		cpfl_vport->p2p_manual_bind = false;
> > +
> > +	/* setup 1 Rx buffer queue for the 1st hairpin rxq */
> > +	if (logic_qid == 0) {
> > +		bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
> > +					   sizeof(struct idpf_rx_queue),
> > +					   RTE_CACHE_LINE_SIZE,
> > +					   SOCKET_ID_ANY);
> > +		if (!bufq1) {
> > +			PMD_INIT_LOG(ERR, "Failed to allocate memory for
> > hairpin Rx buffer queue 1.");
> > +			ret = -ENOMEM;
> > +			goto err_alloc_bufq1;
> > +		}
> > +		qid = 2 * logic_qid;
> > +		ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
> > +		if (ret) {
> > +			PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer
> > queue 1");
> > +			ret = -EINVAL;
> > +			goto err_setup_bufq1;
> > +		}
> > +		cpfl_vport->p2p_rx_bufq = bufq1;
> > +	}
> > +
> > +	rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
> > +	rxq->bufq2 = NULL;
> > +
> > +	cpfl_vport->nb_p2p_rxq++;
> > +	rxq->q_set = true;
> > +	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> > +
> > +	return 0;
> > +
> > +err_setup_bufq1:
> > +	rte_free(bufq1);
> > +err_alloc_bufq1:
> > +	rte_free(rxq);
> > +
> > +	return ret;
> > +}
> > +
> > +int
> > +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +			    uint16_t nb_desc,
> > +			    const struct rte_eth_hairpin_conf *conf) {
> > +	struct cpfl_vport *cpfl_vport =
> > +	    (struct cpfl_vport *)dev->data->dev_private;
> > +
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter_base = vport->adapter;
> > +	uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
> > +	struct cpfl_txq_hairpin_info *hairpin_info;
> > +	struct idpf_hw *hw = &adapter_base->hw;
> > +	struct cpfl_tx_queue *cpfl_txq;
> > +	struct idpf_tx_queue *txq, *cq;
> > +	const struct rte_memzone *mz;
> > +	uint32_t ring_size;
> > +	uint16_t peer_port, peer_q;
> > +
> > +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> > +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> > queue.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (conf->peer_count != 1) {
> > +		PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer
> > count %d", conf->peer_count);
> > +		return -EINVAL;
> > +	}
> > +
> > +	peer_port = conf->peers[0].port;
> > +	peer_q = conf->peers[0].queue;
> > +
> > +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> > +	    nb_desc > CPFL_MAX_RING_DESC ||
> > +	    nb_desc < CPFL_MIN_RING_DESC) {
> > +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is
> > invalid",
> > +			     nb_desc);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Free memory if needed. */
> > +	if (dev->data->tx_queues[queue_idx]) {
> > +		cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
> > +		dev->data->tx_queues[queue_idx] = NULL;
> > +	}
> > +
> > +	/* Allocate the TX queue data structure. */
> > +	cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
> > +				 sizeof(struct cpfl_tx_queue),
> > +				 RTE_CACHE_LINE_SIZE,
> > +				 SOCKET_ID_ANY);
> > +	if (!cpfl_txq) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue
> > structure");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	txq = &cpfl_txq->base;
> > +	hairpin_info = &cpfl_txq->hairpin_info;
> > +	/* Txq ring length should be 2 times of Tx completion queue size. */
> > +	txq->nb_tx_desc = nb_desc * 2;
> > +	txq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.tx_start_qid, logic_qid);
> > +	txq->port_id = dev->data->port_id;
> > +	hairpin_info->hairpin_q = true;
> > +	hairpin_info->peer_rxp = peer_port;
> > +	hairpin_info->peer_rxq_id = peer_q;
> > +
> > +	if (conf->manual_bind != 0)
> > +		cpfl_vport->p2p_manual_bind = true;
> > +	else
> > +		cpfl_vport->p2p_manual_bind = false;
> > +
> > +	/* Always Tx hairpin queue allocates Tx HW ring */
> > +	ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> > +			      CPFL_DMA_MEM_ALIGN);
> > +	mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
> > +				      ring_size + CPFL_P2P_RING_BUF,
> > +				      CPFL_RING_BASE_ALIGN,
> > +				      dev->device->numa_node);
> > +	if (!mz) {
> > +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> > +		rte_free(txq->sw_ring);
> > +		rte_free(txq);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	txq->tx_ring_phys_addr = mz->iova;
> > +	txq->desc_ring = mz->addr;
> > +	txq->mz = mz;
> > +
> > +	cpfl_tx_hairpin_descq_reset(txq);
> > +	txq->qtx_tail = hw->hw_addr +
> > +		cpfl_hw_qtail_get(cpfl_vport-
> > >p2p_q_chunks_info.tx_qtail_start,
> > +				  logic_qid, cpfl_vport-
> > >p2p_q_chunks_info.tx_qtail_spacing);
> > +	txq->ops = &def_txq_ops;
> > +
> > +	if (cpfl_vport->p2p_tx_complq == NULL) {
> [Liu, Mingxia] In cpfl_rx_hairpin_queue_setup(), "logic_qid" is used to identify
> if it is the first time to allocate "p2p_rx_bufq" buffer,
> Can it be unified, using logic_qid == 0 or p2p_tx_complq/ p2p_rx_bufq ==
> NULL ?
> > +		cq = rte_zmalloc_socket("cpfl hairpin cq",
> > +					sizeof(struct idpf_tx_queue),
> > +					RTE_CACHE_LINE_SIZE,
> > +					dev->device->numa_node);
> > +		if (!cq) {
> > +			PMD_INIT_LOG(ERR, "Failed to allocate memory for tx
> > queue structure");
> > +			return -ENOMEM;
> > +		}
> > +
> > +		cq->nb_tx_desc = nb_desc;
> > +		cq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.tx_compl_start_qid, 0);
> > +		cq->port_id = dev->data->port_id;
> > +
> > +		/* Tx completion queue always allocates the HW ring */
> > +		ring_size = RTE_ALIGN(cq->nb_tx_desc * CPFL_P2P_DESC_LEN,
> > +				      CPFL_DMA_MEM_ALIGN);
> > +		mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_compl_ring",
> > logic_qid,
> > +					      ring_size + CPFL_P2P_RING_BUF,
> > +					      CPFL_RING_BASE_ALIGN,
> > +					      dev->device->numa_node);
> > +		if (!mz) {
> > +			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory
> > for TX completion queue");
> > +			rte_free(txq->sw_ring);
> > +			rte_free(txq);
> > +			return -ENOMEM;
> > +		}
> > +		cq->tx_ring_phys_addr = mz->iova;
> > +		cq->compl_ring = mz->addr;
> > +		cq->mz = mz;
> > +
> > +		cpfl_tx_hairpin_complq_reset(cq);
> > +		cpfl_vport->p2p_tx_complq = cq;
> > +	}
> > +
> > +	txq->complq = cpfl_vport->p2p_tx_complq;
> > +
> > +	cpfl_vport->nb_p2p_txq++;
> > +	txq->q_set = true;
> > +	dev->data->tx_queues[queue_idx] = cpfl_txq;
> > +
> > +	return 0;
> > +}
> > +
> >  int
> >  cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)  { @@ -
> > 865,6 +1234,8 @@ cpfl_set_rx_function(struct rte_eth_dev *dev)
> >  		if (vport->rx_vec_allowed) {
> >  			for (i = 0; i < dev->data->nb_rx_queues; i++) {
> >  				cpfl_rxq = dev->data->rx_queues[i];
> > +				if (cpfl_rxq->hairpin_info.hairpin_q)
> > +					continue;
> >  				(void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq-
> > >base);
> >  			}
> >  #ifdef CC_AVX512_SUPPORT
> > diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index
> > 3a87a1f4b3..5e9f2dada7 100644
> > --- a/drivers/net/cpfl/cpfl_rxtx.h
> > +++ b/drivers/net/cpfl/cpfl_rxtx.h
> > @@ -13,6 +13,7 @@
> >  #define CPFL_MIN_RING_DESC	32
> >  #define CPFL_MAX_RING_DESC	4096
> >  #define CPFL_DMA_MEM_ALIGN	4096
> > +#define CPFL_P2P_DESC_LEN		16
> >  #define CPFL_MAX_HAIRPINQ_RX_2_TX	1
> >  #define CPFL_MAX_HAIRPINQ_TX_2_RX	1
> >  #define CPFL_MAX_HAIRPINQ_NB_DESC	1024
> > @@ -21,6 +22,10 @@
> >  #define CPFL_P2P_NB_TX_COMPLQ		1
> >  #define CPFL_P2P_NB_QUEUE_GRPS		1
> >  #define CPFL_P2P_QUEUE_GRP_ID		1
> > +#define CPFL_P2P_NB_MBUF		4096
> > +#define CPFL_P2P_CACHE_SIZE		250
> > +#define CPFL_P2P_MBUF_SIZE		2048
> > +#define CPFL_P2P_RING_BUF		128
> >  /* Base address of the HW descriptor ring should be 128B aligned. */
> >  #define CPFL_RING_BASE_ALIGN	128
> >
> > @@ -31,12 +36,26 @@
> >
> >  #define CPFL_SUPPORT_CHAIN_NUM 5
> >
> > +struct cpfl_rxq_hairpin_info {
> > +	bool hairpin_q;		/* if rx queue is a hairpin queue */
> > +	uint16_t peer_txp;
> > +	uint16_t peer_txq_id;
> > +};
> > +
> >  struct cpfl_rx_queue {
> >  	struct idpf_rx_queue base;
> > +	struct cpfl_rxq_hairpin_info hairpin_info; };
> > +
> > +struct cpfl_txq_hairpin_info {
> > +	bool hairpin_q;		/* if tx queue is a hairpin queue */
> > +	uint16_t peer_rxp;
> > +	uint16_t peer_rxq_id;
> >  };
> >
> >  struct cpfl_tx_queue {
> >  	struct idpf_tx_queue base;
> > +	struct cpfl_txq_hairpin_info hairpin_info;
> >  };
> >
> >  int cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, @@ -
> > 57,4 +76,11 @@ void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev,
> > uint16_t qid);  void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev,
> > uint16_t qid);  void cpfl_set_rx_function(struct rte_eth_dev *dev);  void
> > cpfl_set_tx_function(struct rte_eth_dev *dev);
> > +uint16_t cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset); uint64_t
> > +cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t
> > +tail_spacing); int cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev,
> > uint16_t queue_idx,
> > +				uint16_t nb_desc, const struct
> > rte_eth_hairpin_conf *conf); int
> > +cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +				uint16_t nb_desc,
> > +				const struct rte_eth_hairpin_conf *conf);
> >  #endif /* _CPFL_RXTX_H_ */
> > diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> > b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> > index 5690b17911..d8e9191196 100644
> > --- a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> > +++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
> > @@ -85,6 +85,8 @@ cpfl_rx_vec_dev_check_default(struct rte_eth_dev
> *dev)
> >  		cpfl_rxq = dev->data->rx_queues[i];
> >  		default_ret = cpfl_rx_vec_queue_default(&cpfl_rxq->base);
> >  		if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
> > +			if (cpfl_rxq->hairpin_info.hairpin_q)
> > +				continue;
> >  			splitq_ret = cpfl_rx_splitq_vec_default(&cpfl_rxq-
> > >base);
> >  			ret = splitq_ret && default_ret;
> >  		} else {
> > @@ -106,6 +108,8 @@ cpfl_tx_vec_dev_check_default(struct rte_eth_dev
> *dev)
> >
> >  	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> >  		cpfl_txq = dev->data->tx_queues[i];
> > +		if (cpfl_txq->hairpin_info.hairpin_q)
> > +			continue;
> >  		ret = cpfl_tx_vec_queue_default(&cpfl_txq->base);
> >  		if (ret == CPFL_SCALAR_PATH)
> >  			return CPFL_SCALAR_PATH;
> > --
> > 2.26.2
  
Xing, Beilei May 26, 2023, 3:52 a.m. UTC | #4
> -----Original Message-----
> From: Wu, Jingjing <jingjing.wu@intel.com>
> Sent: Thursday, May 25, 2023 11:59 AM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Liu, Mingxia <mingxia.liu@intel.com>; Wang, Xiao W
> <xiao.w.wang@intel.com>
> Subject: RE: [PATCH v3 05/10] net/cpfl: support hairpin queue setup and
> release
> 
> >
> > +static int
> > +cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue
> *bufq,
> > +			   uint16_t logic_qid, uint16_t nb_desc) {
> > +	struct cpfl_vport *cpfl_vport =
> > +	    (struct cpfl_vport *)dev->data->dev_private;
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter = vport->adapter;
> > +	struct rte_mempool *mp;
> > +	char pool_name[RTE_MEMPOOL_NAMESIZE];
> > +
> > +	mp = cpfl_vport->p2p_mp;
> > +	if (!mp) {
> > +		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE,
> "p2p_mb_pool_%u",
> > +			 dev->data->port_id);
> > +		mp = rte_pktmbuf_pool_create(pool_name,
> CPFL_P2P_NB_MBUF,
> > CPFL_P2P_CACHE_SIZE,
> > +					     0, CPFL_P2P_MBUF_SIZE, dev-
> >device-
> > >numa_node);
> > +		if (!mp) {
> > +			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for
> p2p");
> > +			return -ENOMEM;
> > +		}
> > +		cpfl_vport->p2p_mp = mp;
> > +	}
> > +
> > +	bufq->mp = mp;
> > +	bufq->nb_rx_desc = nb_desc;
> > +	bufq->queue_id = cpfl_hw_qid_get(cpfl_vport-
> > >p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
> > +	bufq->port_id = dev->data->port_id;
> > +	bufq->adapter = adapter;
> > +	bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> RTE_PKTMBUF_HEADROOM;
> > +
> > +	bufq->sw_ring = rte_zmalloc("sw ring",
> > +				    sizeof(struct rte_mbuf *) * nb_desc,
> > +				    RTE_CACHE_LINE_SIZE);
> 
> Is sw_ring required in p2p case? It has been never used right?
> Please also check the sw_ring in tx queue.
Yes, it should be removed.

> 
> > +	if (!bufq->sw_ring) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	bufq->q_set = true;
> > +	bufq->ops = &def_rxq_ops;
> > +
> > +	return 0;
> > +}
> > +
> > +int
> > +cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +			    uint16_t nb_desc,
> > +			    const struct rte_eth_hairpin_conf *conf) {
> > +	struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data-
> >dev_private;
> > +	struct idpf_vport *vport = &cpfl_vport->base;
> > +	struct idpf_adapter *adapter_base = vport->adapter;
> > +	uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
> > +	struct cpfl_rxq_hairpin_info *hairpin_info;
> > +	struct cpfl_rx_queue *cpfl_rxq;
> > +	struct idpf_rx_queue *bufq1 = NULL;
> > +	struct idpf_rx_queue *rxq;
> > +	uint16_t peer_port, peer_q;
> > +	uint16_t qid;
> > +	int ret;
> > +
> > +	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
> > +		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin
> queue.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (conf->peer_count != 1) {
> > +		PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer
> count %d",
> > conf->peer_count);
> > +		return -EINVAL;
> > +	}
> > +
> > +	peer_port = conf->peers[0].port;
> > +	peer_q = conf->peers[0].queue;
> > +
> > +	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
> > +	    nb_desc > CPFL_MAX_RING_DESC ||
> > +	    nb_desc < CPFL_MIN_RING_DESC) {
> > +		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is
> invalid",
> > nb_desc);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Free memory if needed */
> > +	if (dev->data->rx_queues[queue_idx]) {
> > +		cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
> > +		dev->data->rx_queues[queue_idx] = NULL;
> > +	}
> > +
> > +	/* Setup Rx description queue */
> > +	cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
> > +				 sizeof(struct cpfl_rx_queue),
> > +				 RTE_CACHE_LINE_SIZE,
> > +				 SOCKET_ID_ANY);
> > +	if (!cpfl_rxq) {
> > +		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue
> data
> > structure");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	rxq = &cpfl_rxq->base;
> > +	hairpin_info = &cpfl_rxq->hairpin_info;
> > +	rxq->nb_rx_desc = nb_desc * 2;
> > +	rxq->queue_id =
> > +cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.rx_start_qid,
> > logic_qid);
> > +	rxq->port_id = dev->data->port_id;
> > +	rxq->adapter = adapter_base;
> > +	rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE -
> RTE_PKTMBUF_HEADROOM;
> > +	hairpin_info->hairpin_q = true;
> > +	hairpin_info->peer_txp = peer_port;
> > +	hairpin_info->peer_txq_id = peer_q;
> > +
> > +	if (conf->manual_bind != 0)
> > +		cpfl_vport->p2p_manual_bind = true;
> > +	else
> > +		cpfl_vport->p2p_manual_bind = false;
> > +
> > +	/* setup 1 Rx buffer queue for the 1st hairpin rxq */
> > +	if (logic_qid == 0) {
> > +		bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
> > +					   sizeof(struct idpf_rx_queue),
> > +					   RTE_CACHE_LINE_SIZE,
> > +					   SOCKET_ID_ANY);
> > +		if (!bufq1) {
> > +			PMD_INIT_LOG(ERR, "Failed to allocate memory for
> hairpin Rx
> > buffer queue 1.");
> > +			ret = -ENOMEM;
> > +			goto err_alloc_bufq1;
> > +		}
> > +		qid = 2 * logic_qid;
> 
> Inside the brace ( if (logic_qid=0) {} ), the logic_qid should be zero, right? What
> is the purpose of doing qid = 2* logic_qid?
The if condition should be refined.

> 
> > +		ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
> > +		if (ret) {
> > +			PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer
> queue 1");
> > +			ret = -EINVAL;
> > +			goto err_setup_bufq1;
> > +		}
> > +		cpfl_vport->p2p_rx_bufq = bufq1;
> > +	}
> > +
> > +	rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
> > +	rxq->bufq2 = NULL;
> > +
> 
> cpfl_vport->p2p_rx_bufq is allocated in this function. But haven't seen where it
> will be released.

It will be released in cpfl_rx_queue_release function.

> And in cpfl_rx_hairpin_bufq_reset the rxq->bufq1 will be assigned to NULL.
> Will queue release miss this?
> 
> > +	cpfl_vport->nb_p2p_rxq++;
> > +	rxq->q_set = true;
> > +	dev->data->rx_queues[queue_idx] = cpfl_rxq;
> > +
> > +	return 0;
> > +
> > +err_setup_bufq1:
> > +	rte_free(bufq1);
> > +err_alloc_bufq1:
> > +	rte_free(rxq);
> > +
> > +	return ret;
> > +}
> > +
  

Patch

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index 8e471d2a9b..03813716ce 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -874,6 +874,10 @@  cpfl_dev_close(struct rte_eth_dev *dev)
 	struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport->adapter);
 
 	cpfl_dev_stop(dev);
+	if (cpfl_vport->p2p_mp) {
+		rte_mempool_free(cpfl_vport->p2p_mp);
+		cpfl_vport->p2p_mp = NULL;
+	}
 
 	if (!adapter->base.is_rx_singleq && !adapter->base.is_tx_singleq)
 		cpfl_p2p_queue_grps_del(vport);
@@ -916,6 +920,8 @@  static const struct eth_dev_ops cpfl_eth_dev_ops = {
 	.xstats_get_names		= cpfl_dev_xstats_get_names,
 	.xstats_reset			= cpfl_dev_xstats_reset,
 	.hairpin_cap_get		= cpfl_hairpin_cap_get,
+	.rx_hairpin_queue_setup		= cpfl_rx_hairpin_queue_setup,
+	.tx_hairpin_queue_setup		= cpfl_tx_hairpin_queue_setup,
 };
 
 static int
diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h
index 65c9a195b2..a48344299c 100644
--- a/drivers/net/cpfl/cpfl_ethdev.h
+++ b/drivers/net/cpfl/cpfl_ethdev.h
@@ -89,6 +89,18 @@  struct p2p_queue_chunks_info {
 struct cpfl_vport {
 	struct idpf_vport base;
 	struct p2p_queue_chunks_info p2p_q_chunks_info;
+
+	struct rte_mempool *p2p_mp;
+
+	uint16_t nb_data_rxq;
+	uint16_t nb_data_txq;
+	uint16_t nb_p2p_rxq;
+	uint16_t nb_p2p_txq;
+
+	struct idpf_rx_queue *p2p_rx_bufq;
+	struct idpf_tx_queue *p2p_tx_complq;
+	bool p2p_manual_bind;
+
 };
 
 struct cpfl_adapter_ext {
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 04a51b8d15..333a399e73 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -10,6 +10,79 @@ 
 #include "cpfl_rxtx.h"
 #include "cpfl_rxtx_vec_common.h"
 
+uint16_t
+cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset)
+{
+	return start_qid + offset;
+}
+
+uint64_t
+cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t tail_spacing)
+{
+	return tail_start + offset * tail_spacing;
+}
+
+static inline void
+cpfl_tx_hairpin_descq_reset(struct idpf_tx_queue *txq)
+{
+	uint32_t i, size;
+
+	if (!txq) {
+		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
+		return;
+	}
+
+	size = txq->nb_tx_desc * CPFL_P2P_DESC_LEN;
+	for (i = 0; i < size; i++)
+		((volatile char *)txq->desc_ring)[i] = 0;
+}
+
+static inline void
+cpfl_tx_hairpin_complq_reset(struct idpf_tx_queue *cq)
+{
+	uint32_t i, size;
+
+	if (!cq) {
+		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
+		return;
+	}
+
+	size = cq->nb_tx_desc * CPFL_P2P_DESC_LEN;
+	for (i = 0; i < size; i++)
+		((volatile char *)cq->compl_ring)[i] = 0;
+}
+
+static inline void
+cpfl_rx_hairpin_descq_reset(struct idpf_rx_queue *rxq)
+{
+	uint16_t len;
+	uint32_t i;
+
+	if (!rxq)
+		return;
+
+	len = rxq->nb_rx_desc;
+	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
+		((volatile char *)rxq->rx_ring)[i] = 0;
+}
+
+static inline void
+cpfl_rx_hairpin_bufq_reset(struct idpf_rx_queue *rxbq)
+{
+	uint16_t len;
+	uint32_t i;
+
+	if (!rxbq)
+		return;
+
+	len = rxbq->nb_rx_desc;
+	for (i = 0; i < len * CPFL_P2P_DESC_LEN; i++)
+		((volatile char *)rxbq->rx_ring)[i] = 0;
+
+	rxbq->bufq1 = NULL;
+	rxbq->bufq2 = NULL;
+}
+
 static uint64_t
 cpfl_rx_offload_convert(uint64_t offload)
 {
@@ -234,7 +307,10 @@  cpfl_rx_queue_release(void *rxq)
 
 	/* Split queue */
 	if (!q->adapter->is_rx_singleq) {
-		if (q->bufq2)
+		/* the mz is shared between Tx/Rx hairpin, let Rx_release
+		 * free the buf, q->bufq1->mz and q->mz.
+		 */
+		if (!cpfl_rxq->hairpin_info.hairpin_q && q->bufq2)
 			cpfl_rx_split_bufq_release(q->bufq2);
 
 		if (q->bufq1)
@@ -385,6 +461,7 @@  cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		}
 	}
 
+	cpfl_vport->nb_data_rxq++;
 	rxq->q_set = true;
 	dev->data->rx_queues[queue_idx] = cpfl_rxq;
 
@@ -548,6 +625,7 @@  cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
 			queue_idx * vport->chunks_info.tx_qtail_spacing);
 	txq->ops = &def_txq_ops;
+	cpfl_vport->nb_data_txq++;
 	txq->q_set = true;
 	dev->data->tx_queues[queue_idx] = cpfl_txq;
 
@@ -562,6 +640,297 @@  cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	return ret;
 }
 
+static int
+cpfl_rx_hairpin_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *bufq,
+			   uint16_t logic_qid, uint16_t nb_desc)
+{
+	struct cpfl_vport *cpfl_vport =
+	    (struct cpfl_vport *)dev->data->dev_private;
+	struct idpf_vport *vport = &cpfl_vport->base;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct rte_mempool *mp;
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+
+	mp = cpfl_vport->p2p_mp;
+	if (!mp) {
+		snprintf(pool_name, RTE_MEMPOOL_NAMESIZE, "p2p_mb_pool_%u",
+			 dev->data->port_id);
+		mp = rte_pktmbuf_pool_create(pool_name, CPFL_P2P_NB_MBUF, CPFL_P2P_CACHE_SIZE,
+					     0, CPFL_P2P_MBUF_SIZE, dev->device->numa_node);
+		if (!mp) {
+			PMD_INIT_LOG(ERR, "Failed to allocate mbuf pool for p2p");
+			return -ENOMEM;
+		}
+		cpfl_vport->p2p_mp = mp;
+	}
+
+	bufq->mp = mp;
+	bufq->nb_rx_desc = nb_desc;
+	bufq->queue_id = cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.rx_buf_start_qid, logic_qid);
+	bufq->port_id = dev->data->port_id;
+	bufq->adapter = adapter;
+	bufq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
+
+	bufq->sw_ring = rte_zmalloc("sw ring",
+				    sizeof(struct rte_mbuf *) * nb_desc,
+				    RTE_CACHE_LINE_SIZE);
+	if (!bufq->sw_ring) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
+		return -ENOMEM;
+	}
+
+	bufq->q_set = true;
+	bufq->ops = &def_rxq_ops;
+
+	return 0;
+}
+
+int
+cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc,
+			    const struct rte_eth_hairpin_conf *conf)
+{
+	struct cpfl_vport *cpfl_vport = (struct cpfl_vport *)dev->data->dev_private;
+	struct idpf_vport *vport = &cpfl_vport->base;
+	struct idpf_adapter *adapter_base = vport->adapter;
+	uint16_t logic_qid = cpfl_vport->nb_p2p_rxq;
+	struct cpfl_rxq_hairpin_info *hairpin_info;
+	struct cpfl_rx_queue *cpfl_rxq;
+	struct idpf_rx_queue *bufq1 = NULL;
+	struct idpf_rx_queue *rxq;
+	uint16_t peer_port, peer_q;
+	uint16_t qid;
+	int ret;
+
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
+		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin queue.");
+		return -EINVAL;
+	}
+
+	if (conf->peer_count != 1) {
+		PMD_INIT_LOG(ERR, "Can't support Rx hairpin queue peer count %d", conf->peer_count);
+		return -EINVAL;
+	}
+
+	peer_port = conf->peers[0].port;
+	peer_q = conf->peers[0].queue;
+
+	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
+	    nb_desc > CPFL_MAX_RING_DESC ||
+	    nb_desc < CPFL_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is invalid", nb_desc);
+		return -EINVAL;
+	}
+
+	/* Free memory if needed */
+	if (dev->data->rx_queues[queue_idx]) {
+		cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]);
+		dev->data->rx_queues[queue_idx] = NULL;
+	}
+
+	/* Setup Rx description queue */
+	cpfl_rxq = rte_zmalloc_socket("cpfl hairpin rxq",
+				 sizeof(struct cpfl_rx_queue),
+				 RTE_CACHE_LINE_SIZE,
+				 SOCKET_ID_ANY);
+	if (!cpfl_rxq) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for rx queue data structure");
+		return -ENOMEM;
+	}
+
+	rxq = &cpfl_rxq->base;
+	hairpin_info = &cpfl_rxq->hairpin_info;
+	rxq->nb_rx_desc = nb_desc * 2;
+	rxq->queue_id = cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.rx_start_qid, logic_qid);
+	rxq->port_id = dev->data->port_id;
+	rxq->adapter = adapter_base;
+	rxq->rx_buf_len = CPFL_P2P_MBUF_SIZE - RTE_PKTMBUF_HEADROOM;
+	hairpin_info->hairpin_q = true;
+	hairpin_info->peer_txp = peer_port;
+	hairpin_info->peer_txq_id = peer_q;
+
+	if (conf->manual_bind != 0)
+		cpfl_vport->p2p_manual_bind = true;
+	else
+		cpfl_vport->p2p_manual_bind = false;
+
+	/* setup 1 Rx buffer queue for the 1st hairpin rxq */
+	if (logic_qid == 0) {
+		bufq1 = rte_zmalloc_socket("hairpin rx bufq1",
+					   sizeof(struct idpf_rx_queue),
+					   RTE_CACHE_LINE_SIZE,
+					   SOCKET_ID_ANY);
+		if (!bufq1) {
+			PMD_INIT_LOG(ERR, "Failed to allocate memory for hairpin Rx buffer queue 1.");
+			ret = -ENOMEM;
+			goto err_alloc_bufq1;
+		}
+		qid = 2 * logic_qid;
+		ret = cpfl_rx_hairpin_bufq_setup(dev, bufq1, qid, nb_desc);
+		if (ret) {
+			PMD_INIT_LOG(ERR, "Failed to setup hairpin Rx buffer queue 1");
+			ret = -EINVAL;
+			goto err_setup_bufq1;
+		}
+		cpfl_vport->p2p_rx_bufq = bufq1;
+	}
+
+	rxq->bufq1 = cpfl_vport->p2p_rx_bufq;
+	rxq->bufq2 = NULL;
+
+	cpfl_vport->nb_p2p_rxq++;
+	rxq->q_set = true;
+	dev->data->rx_queues[queue_idx] = cpfl_rxq;
+
+	return 0;
+
+err_setup_bufq1:
+	rte_free(bufq1);
+err_alloc_bufq1:
+	rte_free(rxq);
+
+	return ret;
+}
+
+int
+cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc,
+			    const struct rte_eth_hairpin_conf *conf)
+{
+	struct cpfl_vport *cpfl_vport =
+	    (struct cpfl_vport *)dev->data->dev_private;
+
+	struct idpf_vport *vport = &cpfl_vport->base;
+	struct idpf_adapter *adapter_base = vport->adapter;
+	uint16_t logic_qid = cpfl_vport->nb_p2p_txq;
+	struct cpfl_txq_hairpin_info *hairpin_info;
+	struct idpf_hw *hw = &adapter_base->hw;
+	struct cpfl_tx_queue *cpfl_txq;
+	struct idpf_tx_queue *txq, *cq;
+	const struct rte_memzone *mz;
+	uint32_t ring_size;
+	uint16_t peer_port, peer_q;
+
+	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) {
+		PMD_INIT_LOG(ERR, "Only spilt queue model supports hairpin queue.");
+		return -EINVAL;
+	}
+
+	if (conf->peer_count != 1) {
+		PMD_INIT_LOG(ERR, "Can't support Tx hairpin queue peer count %d", conf->peer_count);
+		return -EINVAL;
+	}
+
+	peer_port = conf->peers[0].port;
+	peer_q = conf->peers[0].queue;
+
+	if (nb_desc % CPFL_ALIGN_RING_DESC != 0 ||
+	    nb_desc > CPFL_MAX_RING_DESC ||
+	    nb_desc < CPFL_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
+			     nb_desc);
+		return -EINVAL;
+	}
+
+	/* Free memory if needed. */
+	if (dev->data->tx_queues[queue_idx]) {
+		cpfl_tx_queue_release(dev->data->tx_queues[queue_idx]);
+		dev->data->tx_queues[queue_idx] = NULL;
+	}
+
+	/* Allocate the TX queue data structure. */
+	cpfl_txq = rte_zmalloc_socket("cpfl hairpin txq",
+				 sizeof(struct cpfl_tx_queue),
+				 RTE_CACHE_LINE_SIZE,
+				 SOCKET_ID_ANY);
+	if (!cpfl_txq) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+
+	txq = &cpfl_txq->base;
+	hairpin_info = &cpfl_txq->hairpin_info;
+	/* Txq ring length should be 2 times of Tx completion queue size. */
+	txq->nb_tx_desc = nb_desc * 2;
+	txq->queue_id = cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.tx_start_qid, logic_qid);
+	txq->port_id = dev->data->port_id;
+	hairpin_info->hairpin_q = true;
+	hairpin_info->peer_rxp = peer_port;
+	hairpin_info->peer_rxq_id = peer_q;
+
+	if (conf->manual_bind != 0)
+		cpfl_vport->p2p_manual_bind = true;
+	else
+		cpfl_vport->p2p_manual_bind = false;
+
+	/* Always Tx hairpin queue allocates Tx HW ring */
+	ring_size = RTE_ALIGN(txq->nb_tx_desc * CPFL_P2P_DESC_LEN,
+			      CPFL_DMA_MEM_ALIGN);
+	mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_ring", logic_qid,
+				      ring_size + CPFL_P2P_RING_BUF,
+				      CPFL_RING_BASE_ALIGN,
+				      dev->device->numa_node);
+	if (!mz) {
+		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
+		rte_free(txq->sw_ring);
+		rte_free(txq);
+		return -ENOMEM;
+	}
+
+	txq->tx_ring_phys_addr = mz->iova;
+	txq->desc_ring = mz->addr;
+	txq->mz = mz;
+
+	cpfl_tx_hairpin_descq_reset(txq);
+	txq->qtx_tail = hw->hw_addr +
+		cpfl_hw_qtail_get(cpfl_vport->p2p_q_chunks_info.tx_qtail_start,
+				  logic_qid, cpfl_vport->p2p_q_chunks_info.tx_qtail_spacing);
+	txq->ops = &def_txq_ops;
+
+	if (cpfl_vport->p2p_tx_complq == NULL) {
+		cq = rte_zmalloc_socket("cpfl hairpin cq",
+					sizeof(struct idpf_tx_queue),
+					RTE_CACHE_LINE_SIZE,
+					dev->device->numa_node);
+		if (!cq) {
+			PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+			return -ENOMEM;
+		}
+
+		cq->nb_tx_desc = nb_desc;
+		cq->queue_id = cpfl_hw_qid_get(cpfl_vport->p2p_q_chunks_info.tx_compl_start_qid, 0);
+		cq->port_id = dev->data->port_id;
+
+		/* Tx completion queue always allocates the HW ring */
+		ring_size = RTE_ALIGN(cq->nb_tx_desc * CPFL_P2P_DESC_LEN,
+				      CPFL_DMA_MEM_ALIGN);
+		mz = rte_eth_dma_zone_reserve(dev, "hairpin_tx_compl_ring", logic_qid,
+					      ring_size + CPFL_P2P_RING_BUF,
+					      CPFL_RING_BASE_ALIGN,
+					      dev->device->numa_node);
+		if (!mz) {
+			PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX completion queue");
+			rte_free(txq->sw_ring);
+			rte_free(txq);
+			return -ENOMEM;
+		}
+		cq->tx_ring_phys_addr = mz->iova;
+		cq->compl_ring = mz->addr;
+		cq->mz = mz;
+
+		cpfl_tx_hairpin_complq_reset(cq);
+		cpfl_vport->p2p_tx_complq = cq;
+	}
+
+	txq->complq = cpfl_vport->p2p_tx_complq;
+
+	cpfl_vport->nb_p2p_txq++;
+	txq->q_set = true;
+	dev->data->tx_queues[queue_idx] = cpfl_txq;
+
+	return 0;
+}
+
 int
 cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
@@ -865,6 +1234,8 @@  cpfl_set_rx_function(struct rte_eth_dev *dev)
 		if (vport->rx_vec_allowed) {
 			for (i = 0; i < dev->data->nb_rx_queues; i++) {
 				cpfl_rxq = dev->data->rx_queues[i];
+				if (cpfl_rxq->hairpin_info.hairpin_q)
+					continue;
 				(void)idpf_qc_splitq_rx_vec_setup(&cpfl_rxq->base);
 			}
 #ifdef CC_AVX512_SUPPORT
diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h
index 3a87a1f4b3..5e9f2dada7 100644
--- a/drivers/net/cpfl/cpfl_rxtx.h
+++ b/drivers/net/cpfl/cpfl_rxtx.h
@@ -13,6 +13,7 @@ 
 #define CPFL_MIN_RING_DESC	32
 #define CPFL_MAX_RING_DESC	4096
 #define CPFL_DMA_MEM_ALIGN	4096
+#define CPFL_P2P_DESC_LEN		16
 #define CPFL_MAX_HAIRPINQ_RX_2_TX	1
 #define CPFL_MAX_HAIRPINQ_TX_2_RX	1
 #define CPFL_MAX_HAIRPINQ_NB_DESC	1024
@@ -21,6 +22,10 @@ 
 #define CPFL_P2P_NB_TX_COMPLQ		1
 #define CPFL_P2P_NB_QUEUE_GRPS		1
 #define CPFL_P2P_QUEUE_GRP_ID		1
+#define CPFL_P2P_NB_MBUF		4096
+#define CPFL_P2P_CACHE_SIZE		250
+#define CPFL_P2P_MBUF_SIZE		2048
+#define CPFL_P2P_RING_BUF		128
 /* Base address of the HW descriptor ring should be 128B aligned. */
 #define CPFL_RING_BASE_ALIGN	128
 
@@ -31,12 +36,26 @@ 
 
 #define CPFL_SUPPORT_CHAIN_NUM 5
 
+struct cpfl_rxq_hairpin_info {
+	bool hairpin_q;		/* if rx queue is a hairpin queue */
+	uint16_t peer_txp;
+	uint16_t peer_txq_id;
+};
+
 struct cpfl_rx_queue {
 	struct idpf_rx_queue base;
+	struct cpfl_rxq_hairpin_info hairpin_info;
+};
+
+struct cpfl_txq_hairpin_info {
+	bool hairpin_q;		/* if tx queue is a hairpin queue */
+	uint16_t peer_rxp;
+	uint16_t peer_rxq_id;
 };
 
 struct cpfl_tx_queue {
 	struct idpf_tx_queue base;
+	struct cpfl_txq_hairpin_info hairpin_info;
 };
 
 int cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
@@ -57,4 +76,11 @@  void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
 void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
 void cpfl_set_rx_function(struct rte_eth_dev *dev);
 void cpfl_set_tx_function(struct rte_eth_dev *dev);
+uint16_t cpfl_hw_qid_get(uint16_t start_qid, uint16_t offset);
+uint64_t cpfl_hw_qtail_get(uint64_t tail_start, uint16_t offset, uint64_t tail_spacing);
+int cpfl_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+				uint16_t nb_desc, const struct rte_eth_hairpin_conf *conf);
+int cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+				uint16_t nb_desc,
+				const struct rte_eth_hairpin_conf *conf);
 #endif /* _CPFL_RXTX_H_ */
diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
index 5690b17911..d8e9191196 100644
--- a/drivers/net/cpfl/cpfl_rxtx_vec_common.h
+++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
@@ -85,6 +85,8 @@  cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 		cpfl_rxq = dev->data->rx_queues[i];
 		default_ret = cpfl_rx_vec_queue_default(&cpfl_rxq->base);
 		if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+			if (cpfl_rxq->hairpin_info.hairpin_q)
+				continue;
 			splitq_ret = cpfl_rx_splitq_vec_default(&cpfl_rxq->base);
 			ret = splitq_ret && default_ret;
 		} else {
@@ -106,6 +108,8 @@  cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
 		cpfl_txq = dev->data->tx_queues[i];
+		if (cpfl_txq->hairpin_info.hairpin_q)
+			continue;
 		ret = cpfl_tx_vec_queue_default(&cpfl_txq->base);
 		if (ret == CPFL_SCALAR_PATH)
 			return CPFL_SCALAR_PATH;