[v11,03/18] net/idpf: add Tx queue setup

Message ID 20221024131227.1062446-4-junfeng.guo@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series add support for idpf PMD in DPDK |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Junfeng Guo Oct. 24, 2022, 1:12 p.m. UTC
  Add support for tx_queue_setup ops.

In the single queue model, the same descriptor queue is used by SW to
post buffer descriptors to HW and by HW to post completed descriptors
to SW.

In the split queue model, "RX buffer queues" are used to pass
descriptor buffers from SW to HW while Rx queues are used only to
pass the descriptor completions, that is, descriptors that point
to completed buffers, from HW to SW. This is contrary to the single
queue model in which Rx queues are used for both purposes.

Signed-off-by: Beilei Xing <beilei.xing@intel.com>
Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
---
 drivers/net/idpf/idpf_ethdev.c |  18 +-
 drivers/net/idpf/idpf_rxtx.c   | 371 +++++++++++++++++++++++++++++++++
 drivers/net/idpf/idpf_rxtx.h   |  76 +++++++
 drivers/net/idpf/meson.build   |   1 +
 4 files changed, 464 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/idpf/idpf_rxtx.c
 create mode 100644 drivers/net/idpf/idpf_rxtx.h
  

Comments

Andrew Rybchenko Oct. 25, 2022, 9:40 a.m. UTC | #1
On 10/24/22 16:12, Junfeng Guo wrote:
> Add support for tx_queue_setup ops.
> 
> In the single queue model, the same descriptor queue is used by SW to
> post buffer descriptors to HW and by HW to post completed descriptors
> to SW.
> 
> In the split queue model, "RX buffer queues" are used to pass
> descriptor buffers from SW to HW while Rx queues are used only to
> pass the descriptor completions, that is, descriptors that point
> to completed buffers, from HW to SW. This is contrary to the single
> queue model in which Rx queues are used for both purposes.
> 
> Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>

[snip]

> diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
> index 75695085f8..c0307128be 100644
> --- a/drivers/net/idpf/idpf_ethdev.c
> +++ b/drivers/net/idpf/idpf_ethdev.c
> @@ -10,6 +10,7 @@
>   #include <rte_dev.h>
>   
>   #include "idpf_ethdev.h"
> +#include "idpf_rxtx.h"
>   
>   #define IDPF_TX_SINGLE_Q	"tx_single"
>   #define IDPF_RX_SINGLE_Q	"rx_single"
> @@ -36,6 +37,7 @@ static void idpf_adapter_rel(struct idpf_adapter *adapter);
>   static const struct eth_dev_ops idpf_eth_dev_ops = {
>   	.dev_configure			= idpf_dev_configure,
>   	.dev_close			= idpf_dev_close,
> +	.tx_queue_setup			= idpf_tx_queue_setup,
>   	.dev_infos_get			= idpf_dev_info_get,
>   };
>   
> @@ -54,11 +56,23 @@ idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>   	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
>   
>   	dev_info->max_mac_addrs = IDPF_NUM_MACADDR_MAX;
> -	dev_info->dev_capa = 0;
> +	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;

I've played with two Intel PMDs: i40e and ice and both have
bugs in runtime queue setup. That's why I'm trying to review
it carefully in a new driver. So, I'd like to repeat once
again: the feature does not make sense without device start
support since corresponding code cann't be in-place and
cannot be reviewed here.


>   	dev_info->rx_offload_capa = 0;
> -

Unrlated change

>   	dev_info->tx_offload_capa = 0;
>   
> +	dev_info->default_txconf = (struct rte_eth_txconf) {
> +		.tx_free_thresh = IDPF_DEFAULT_TX_FREE_THRESH,
> +		.tx_rs_thresh = IDPF_DEFAULT_TX_RS_THRESH,
> +		.offloads = 0,

There is no necessity to initialize to zero explicitly since
the compiler does it for you implicitly.

> +	};
> +
> +	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
> +		.nb_max = IDPF_MAX_RING_DESC,
> +		.nb_min = IDPF_MIN_RING_DESC,
> +		.nb_align = IDPF_ALIGN_RING_DESC,
> +	};
> +
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
> new file mode 100644
> index 0000000000..df0ed772c1
> --- /dev/null
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -0,0 +1,371 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <ethdev_driver.h>
> +#include <rte_net.h>
> +
> +#include "idpf_ethdev.h"
> +#include "idpf_rxtx.h"
> +
> +static inline int

There is no point to make it inline. It is a control path.
Please, keep it to the compiler to do its job.

> +check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh,
> +		uint16_t tx_free_thresh)

[snip]

> +static inline void
> +reset_split_tx_descq(struct idpf_tx_queue *txq)

same here since it looks like control path as well.

[snip]

> +{
> +	struct idpf_tx_entry *txe;
> +	uint32_t i, size;
> +	uint16_t prev;
> +
> +	if (txq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +		return;
> +	}
> +
> +	size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)txq->desc_ring)[i] = 0;

Please, add a comment which explains why volatile is
required here.

> +
> +	txe = txq->sw_ring;
> +	prev = (uint16_t)(txq->sw_nb_desc - 1);
> +	for (i = 0; i < txq->sw_nb_desc; i++) {
> +		txe[i].mbuf = NULL;
> +		txe[i].last_id = i;
> +		txe[prev].next_id = i;
> +		prev = i;
> +	}
> +
> +	txq->tx_tail = 0;
> +	txq->nb_used = 0;
> +
> +	/* Use this as next to clean for split desc queue */
> +	txq->last_desc_cleaned = 0;
> +	txq->sw_tail = 0;
> +	txq->nb_free = txq->nb_tx_desc - 1;
> +}
> +
> +static inline void
> +reset_split_tx_complq(struct idpf_tx_queue *cq)

same here

> +{
> +	uint32_t i, size;
> +
> +	if (cq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
> +		return;
> +	}
> +
> +	size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)cq->compl_ring)[i] = 0;

same ehre

> +
> +	cq->tx_tail = 0;
> +	cq->expected_gen_id = 1;
> +}
> +
> +static inline void
> +reset_single_tx_queue(struct idpf_tx_queue *txq)

same here

> +{
> +	struct idpf_tx_entry *txe;
> +	uint32_t i, size;
> +	uint16_t prev;
> +
> +	if (txq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +		return;
> +	}
> +
> +	txe = txq->sw_ring;
> +	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)txq->tx_ring)[i] = 0;

same here

> +
> +	prev = (uint16_t)(txq->nb_tx_desc - 1);
> +	for (i = 0; i < txq->nb_tx_desc; i++) {
> +		txq->tx_ring[i].qw1.cmd_dtype =
> +			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> +		txe[i].mbuf =  NULL;
> +		txe[i].last_id = i;
> +		txe[prev].next_id = i;
> +		prev = i;
> +	}
> +
> +	txq->tx_tail = 0;
> +	txq->nb_used = 0;
> +
> +	txq->last_desc_cleaned = txq->nb_tx_desc - 1;
> +	txq->nb_free = txq->nb_tx_desc - 1;
> +
> +	txq->next_dd = txq->rs_thresh - 1;
> +	txq->next_rs = txq->rs_thresh - 1;
> +}
> +
> +static int
> +idpf_tx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			  uint16_t nb_desc, unsigned int socket_id,
> +			  const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	uint16_t tx_rs_thresh, tx_free_thresh;
> +	struct idpf_hw *hw = &adapter->hw;
> +	struct idpf_tx_queue *txq, *cq;
> +	const struct rte_memzone *mz;
> +	uint32_t ring_size;
> +	uint64_t offloads;
> +
> +	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
> +
> +	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > IDPF_MAX_RING_DESC ||
> +	    nb_desc < IDPF_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}

You don't need to check it here, since ethdev does it for you
before driver operation invocation.

> +
> +	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?

compare vs 0 explicitly

> +		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
> +	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?

compare vs 0 explicitly

> +		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
> +	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
> +		return -EINVAL;
> +
> +	/* Allocate the TX queue data structure. */
> +	txq = rte_zmalloc_socket("idpf split txq",
> +				 sizeof(struct idpf_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 socket_id);
> +	if (txq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
> +		return -ENOMEM;
> +	}
> +
> +	txq->nb_tx_desc = nb_desc;
> +	txq->rs_thresh = tx_rs_thresh;
> +	txq->free_thresh = tx_free_thresh;
> +	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
> +	txq->port_id = dev->data->port_id;
> +	txq->offloads = offloads;
> +	txq->tx_deferred_start = tx_conf->tx_deferred_start;

Deferred start is not supported at this point and
request with deferred start on should be rejected.

> +
> +	/* Allocate software ring */
> +	txq->sw_nb_desc = 2 * nb_desc;
> +	txq->sw_ring =
> +		rte_zmalloc_socket("idpf split tx sw ring",
> +				   sizeof(struct idpf_tx_entry) *
> +				   txq->sw_nb_desc,
> +				   RTE_CACHE_LINE_SIZE,
> +				   socket_id);
> +	if (txq->sw_ring == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate TX hardware ring descriptors. */
> +	ring_size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "split_tx_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->desc_ring = (struct idpf_flex_tx_sched_desc *)mz->addr;
> +
> +	txq->mz = mz;
> +	reset_split_tx_descq(txq);
> +	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> +			queue_idx * vport->chunks_info.tx_qtail_spacing);
> +
> +	/* Allocate the TX completion queue data structure. */
> +	txq->complq = rte_zmalloc_socket("idpf splitq cq",
> +					 sizeof(struct idpf_tx_queue),
> +					 RTE_CACHE_LINE_SIZE,
> +					 socket_id);
> +	cq = txq->complq;
> +	if (cq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");

Free previously allocated resources including txq->complq.
However, I'd recomment to use goto style to do cleanup in
this case.

> +		return -ENOMEM;
> +	}
> +	cq->nb_tx_desc = 2 * nb_desc;
> +	cq->queue_id = vport->chunks_info.tx_compl_start_qid + queue_idx;
> +	cq->port_id = dev->data->port_id;
> +	cq->txqs = dev->data->tx_queues;
> +	cq->tx_start_qid = vport->chunks_info.tx_start_qid;
> +
> +	ring_size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "tx_split_compl_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX completion queue");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);

Free txq->complq and txq->mz

> +		return -ENOMEM;
> +	}
> +	cq->tx_ring_phys_addr = mz->iova;
> +	cq->compl_ring = (struct idpf_splitq_tx_compl_desc *)mz->addr;
> +	cq->mz = mz;
> +	reset_split_tx_complq(cq);
> +
> +	txq->q_set = true;
> +	dev->data->tx_queues[queue_idx] = txq;
> +
> +	return 0;
> +}
> +
> +static int
> +idpf_tx_single_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			   uint16_t nb_desc, unsigned int socket_id,
> +			   const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	uint16_t tx_rs_thresh, tx_free_thresh;
> +	struct idpf_hw *hw = &adapter->hw;
> +	const struct rte_memzone *mz;
> +	struct idpf_tx_queue *txq;
> +	uint32_t ring_size;
> +	uint64_t offloads;
> +
> +	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
> +
> +	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > IDPF_MAX_RING_DESC ||
> +	    nb_desc < IDPF_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}

You don't need to check it here, since ethdev does it for you
before driver operation invocation.

> +
> +	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
> +		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
> +	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
> +		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
> +	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
> +		return -EINVAL;
> +
> +	/* Allocate the TX queue data structure. */
> +	txq = rte_zmalloc_socket("idpf txq",
> +				 sizeof(struct idpf_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 socket_id);
> +	if (txq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
> +		return -ENOMEM;
> +	}
> +
> +	/* TODO: vlan offload */
> +
> +	txq->nb_tx_desc = nb_desc;
> +	txq->rs_thresh = tx_rs_thresh;
> +	txq->free_thresh = tx_free_thresh;
> +	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
> +	txq->port_id = dev->data->port_id;
> +	txq->offloads = offloads;
> +	txq->tx_deferred_start = tx_conf->tx_deferred_start;

same here

> +
> +	/* Allocate software ring */
> +	txq->sw_ring =
> +		rte_zmalloc_socket("idpf tx sw ring",
> +				   sizeof(struct idpf_tx_entry) * nb_desc,
> +				   RTE_CACHE_LINE_SIZE,
> +				   socket_id);
> +	if (txq->sw_ring == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate TX hardware ring descriptors. */
> +	ring_size = sizeof(struct idpf_flex_tx_desc) * nb_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->tx_ring = (struct idpf_flex_tx_desc *)mz->addr;
> +
> +	txq->mz = mz;
> +	reset_single_tx_queue(txq);
> +	txq->q_set = true;
> +	dev->data->tx_queues[queue_idx] = txq;
> +	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> +			queue_idx * vport->chunks_info.tx_qtail_spacing);

I'm sorry, but it looks like too much code is duplicated.
I guess it could be a shared function to avoid it.

> +
> +	return 0;
> +}
> +
> +int
> +idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +		    uint16_t nb_desc, unsigned int socket_id,
> +		    const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +
> +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
> +		return idpf_tx_single_queue_setup(dev, queue_idx, nb_desc,
> +						  socket_id, tx_conf);
> +	else
> +		return idpf_tx_split_queue_setup(dev, queue_idx, nb_desc,
> +						 socket_id, tx_conf);
> +}

[snip]
  
Xing, Beilei Oct. 26, 2022, 8:34 a.m. UTC | #2
> -----Original Message-----
> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Sent: Tuesday, October 25, 2022 5:40 PM
> To: Guo, Junfeng <junfeng.guo@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>
> Cc: dev@dpdk.org; Li, Xiaoyun <xiaoyun.li@intel.com>
> Subject: Re: [PATCH v11 03/18] net/idpf: add Tx queue setup
> 
> On 10/24/22 16:12, Junfeng Guo wrote:
> > Add support for tx_queue_setup ops.
> >
> > In the single queue model, the same descriptor queue is used by SW to
> > post buffer descriptors to HW and by HW to post completed descriptors
> > to SW.
> >
> > In the split queue model, "RX buffer queues" are used to pass
> > descriptor buffers from SW to HW while Rx queues are used only to pass
> > the descriptor completions, that is, descriptors that point to
> > completed buffers, from HW to SW. This is contrary to the single queue
> > model in which Rx queues are used for both purposes.
> >
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
> > Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
> 
> > +
> > +	size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
> > +	for (i = 0; i < size; i++)
> > +		((volatile char *)txq->desc_ring)[i] = 0;
> 
> Please, add a comment which explains why volatile is required here.

Volatile is used here to make sure the memory write through when touch desc.
It follows Intel PMD's coding style.

> 
> > +
> > +	txq->tx_ring_phys_addr = mz->iova;
> > +	txq->tx_ring = (struct idpf_flex_tx_desc *)mz->addr;
> > +
> > +	txq->mz = mz;
> > +	reset_single_tx_queue(txq);
> > +	txq->q_set = true;
> > +	dev->data->tx_queues[queue_idx] = txq;
> > +	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> > +			queue_idx * vport->chunks_info.tx_qtail_spacing);
> 
> I'm sorry, but it looks like too much code is duplicated.
> I guess it could be a shared function to avoid it.

It makes sense to optimize the queue setup, but can we improve it in next release
due to the RC2 deadline? 
And this part should be common module shared by idpf PMD and a new PMD,
It should be in driver/common/idpf folder in DPDK 23.03.

> 
> > +
> > +	return 0;
> > +}
> > +
> > +int
> > +idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> > +		    uint16_t nb_desc, unsigned int socket_id,
> > +		    const struct rte_eth_txconf *tx_conf) {
> > +	struct idpf_vport *vport = dev->data->dev_private;
> > +
> > +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
> > +		return idpf_tx_single_queue_setup(dev, queue_idx, nb_desc,
> > +						  socket_id, tx_conf);
> > +	else
> > +		return idpf_tx_split_queue_setup(dev, queue_idx, nb_desc,
> > +						 socket_id, tx_conf);
> > +}
> 
> [snip]
  

Patch

diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index 75695085f8..c0307128be 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -10,6 +10,7 @@ 
 #include <rte_dev.h>
 
 #include "idpf_ethdev.h"
+#include "idpf_rxtx.h"
 
 #define IDPF_TX_SINGLE_Q	"tx_single"
 #define IDPF_RX_SINGLE_Q	"rx_single"
@@ -36,6 +37,7 @@  static void idpf_adapter_rel(struct idpf_adapter *adapter);
 static const struct eth_dev_ops idpf_eth_dev_ops = {
 	.dev_configure			= idpf_dev_configure,
 	.dev_close			= idpf_dev_close,
+	.tx_queue_setup			= idpf_tx_queue_setup,
 	.dev_infos_get			= idpf_dev_info_get,
 };
 
@@ -54,11 +56,23 @@  idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
 
 	dev_info->max_mac_addrs = IDPF_NUM_MACADDR_MAX;
-	dev_info->dev_capa = 0;
+	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
 	dev_info->rx_offload_capa = 0;
-
 	dev_info->tx_offload_capa = 0;
 
+	dev_info->default_txconf = (struct rte_eth_txconf) {
+		.tx_free_thresh = IDPF_DEFAULT_TX_FREE_THRESH,
+		.tx_rs_thresh = IDPF_DEFAULT_TX_RS_THRESH,
+		.offloads = 0,
+	};
+
+	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
+		.nb_max = IDPF_MAX_RING_DESC,
+		.nb_min = IDPF_MIN_RING_DESC,
+		.nb_align = IDPF_ALIGN_RING_DESC,
+	};
+
+
 	return 0;
 }
 
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
new file mode 100644
index 0000000000..df0ed772c1
--- /dev/null
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -0,0 +1,371 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <ethdev_driver.h>
+#include <rte_net.h>
+
+#include "idpf_ethdev.h"
+#include "idpf_rxtx.h"
+
+static inline int
+check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh,
+		uint16_t tx_free_thresh)
+{
+	/* TX descriptors will have their RS bit set after tx_rs_thresh
+	 * descriptors have been used. The TX descriptor ring will be cleaned
+	 * after tx_free_thresh descriptors are used or if the number of
+	 * descriptors required to transmit a packet is greater than the
+	 * number of free TX descriptors.
+	 *
+	 * The following constraints must be satisfied:
+	 *  - tx_rs_thresh must be less than the size of the ring minus 2.
+	 *  - tx_free_thresh must be less than the size of the ring minus 3.
+	 *  - tx_rs_thresh must be less than or equal to tx_free_thresh.
+	 *  - tx_rs_thresh must be a divisor of the ring size.
+	 *
+	 * One descriptor in the TX ring is used as a sentinel to avoid a H/W
+	 * race condition, hence the maximum threshold constraints. When set
+	 * to zero use default values.
+	 */
+	if (tx_rs_thresh >= (nb_desc - 2)) {
+		PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be less than the "
+			     "number of TX descriptors (%u) minus 2",
+			     tx_rs_thresh, nb_desc);
+		return -EINVAL;
+	}
+	if (tx_free_thresh >= (nb_desc - 3)) {
+		PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be less than the "
+			     "number of TX descriptors (%u) minus 3.",
+			     tx_free_thresh, nb_desc);
+		return -EINVAL;
+	}
+	if (tx_rs_thresh > tx_free_thresh) {
+		PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be less than or "
+			     "equal to tx_free_thresh (%u).",
+			     tx_rs_thresh, tx_free_thresh);
+		return -EINVAL;
+	}
+	if ((nb_desc % tx_rs_thresh) != 0) {
+		PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be a divisor of the "
+			     "number of TX descriptors (%u).",
+			     tx_rs_thresh, nb_desc);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline void
+reset_split_tx_descq(struct idpf_tx_queue *txq)
+{
+	struct idpf_tx_entry *txe;
+	uint32_t i, size;
+	uint16_t prev;
+
+	if (txq == NULL) {
+		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
+		return;
+	}
+
+	size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
+	for (i = 0; i < size; i++)
+		((volatile char *)txq->desc_ring)[i] = 0;
+
+	txe = txq->sw_ring;
+	prev = (uint16_t)(txq->sw_nb_desc - 1);
+	for (i = 0; i < txq->sw_nb_desc; i++) {
+		txe[i].mbuf = NULL;
+		txe[i].last_id = i;
+		txe[prev].next_id = i;
+		prev = i;
+	}
+
+	txq->tx_tail = 0;
+	txq->nb_used = 0;
+
+	/* Use this as next to clean for split desc queue */
+	txq->last_desc_cleaned = 0;
+	txq->sw_tail = 0;
+	txq->nb_free = txq->nb_tx_desc - 1;
+}
+
+static inline void
+reset_split_tx_complq(struct idpf_tx_queue *cq)
+{
+	uint32_t i, size;
+
+	if (cq == NULL) {
+		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
+		return;
+	}
+
+	size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
+	for (i = 0; i < size; i++)
+		((volatile char *)cq->compl_ring)[i] = 0;
+
+	cq->tx_tail = 0;
+	cq->expected_gen_id = 1;
+}
+
+static inline void
+reset_single_tx_queue(struct idpf_tx_queue *txq)
+{
+	struct idpf_tx_entry *txe;
+	uint32_t i, size;
+	uint16_t prev;
+
+	if (txq == NULL) {
+		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
+		return;
+	}
+
+	txe = txq->sw_ring;
+	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
+	for (i = 0; i < size; i++)
+		((volatile char *)txq->tx_ring)[i] = 0;
+
+	prev = (uint16_t)(txq->nb_tx_desc - 1);
+	for (i = 0; i < txq->nb_tx_desc; i++) {
+		txq->tx_ring[i].qw1.cmd_dtype =
+			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
+		txe[i].mbuf =  NULL;
+		txe[i].last_id = i;
+		txe[prev].next_id = i;
+		prev = i;
+	}
+
+	txq->tx_tail = 0;
+	txq->nb_used = 0;
+
+	txq->last_desc_cleaned = txq->nb_tx_desc - 1;
+	txq->nb_free = txq->nb_tx_desc - 1;
+
+	txq->next_dd = txq->rs_thresh - 1;
+	txq->next_rs = txq->rs_thresh - 1;
+}
+
+static int
+idpf_tx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			  uint16_t nb_desc, unsigned int socket_id,
+			  const struct rte_eth_txconf *tx_conf)
+{
+	struct idpf_vport *vport = dev->data->dev_private;
+	struct idpf_adapter *adapter = vport->adapter;
+	uint16_t tx_rs_thresh, tx_free_thresh;
+	struct idpf_hw *hw = &adapter->hw;
+	struct idpf_tx_queue *txq, *cq;
+	const struct rte_memzone *mz;
+	uint32_t ring_size;
+	uint64_t offloads;
+
+	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
+
+	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
+	    nb_desc > IDPF_MAX_RING_DESC ||
+	    nb_desc < IDPF_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
+			     nb_desc);
+		return -EINVAL;
+	}
+
+	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
+		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
+	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
+		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
+	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
+		return -EINVAL;
+
+	/* Allocate the TX queue data structure. */
+	txq = rte_zmalloc_socket("idpf split txq",
+				 sizeof(struct idpf_tx_queue),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (txq == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+
+	txq->nb_tx_desc = nb_desc;
+	txq->rs_thresh = tx_rs_thresh;
+	txq->free_thresh = tx_free_thresh;
+	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
+	txq->port_id = dev->data->port_id;
+	txq->offloads = offloads;
+	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+
+	/* Allocate software ring */
+	txq->sw_nb_desc = 2 * nb_desc;
+	txq->sw_ring =
+		rte_zmalloc_socket("idpf split tx sw ring",
+				   sizeof(struct idpf_tx_entry) *
+				   txq->sw_nb_desc,
+				   RTE_CACHE_LINE_SIZE,
+				   socket_id);
+	if (txq->sw_ring == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
+		rte_free(txq);
+		return -ENOMEM;
+	}
+
+	/* Allocate TX hardware ring descriptors. */
+	ring_size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
+	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
+	mz = rte_eth_dma_zone_reserve(dev, "split_tx_ring", queue_idx,
+				      ring_size, IDPF_RING_BASE_ALIGN,
+				      socket_id);
+	if (mz == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
+		rte_free(txq->sw_ring);
+		rte_free(txq);
+		return -ENOMEM;
+	}
+	txq->tx_ring_phys_addr = mz->iova;
+	txq->desc_ring = (struct idpf_flex_tx_sched_desc *)mz->addr;
+
+	txq->mz = mz;
+	reset_split_tx_descq(txq);
+	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
+			queue_idx * vport->chunks_info.tx_qtail_spacing);
+
+	/* Allocate the TX completion queue data structure. */
+	txq->complq = rte_zmalloc_socket("idpf splitq cq",
+					 sizeof(struct idpf_tx_queue),
+					 RTE_CACHE_LINE_SIZE,
+					 socket_id);
+	cq = txq->complq;
+	if (cq == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+	cq->nb_tx_desc = 2 * nb_desc;
+	cq->queue_id = vport->chunks_info.tx_compl_start_qid + queue_idx;
+	cq->port_id = dev->data->port_id;
+	cq->txqs = dev->data->tx_queues;
+	cq->tx_start_qid = vport->chunks_info.tx_start_qid;
+
+	ring_size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
+	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
+	mz = rte_eth_dma_zone_reserve(dev, "tx_split_compl_ring", queue_idx,
+				      ring_size, IDPF_RING_BASE_ALIGN,
+				      socket_id);
+	if (mz == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX completion queue");
+		rte_free(txq->sw_ring);
+		rte_free(txq);
+		return -ENOMEM;
+	}
+	cq->tx_ring_phys_addr = mz->iova;
+	cq->compl_ring = (struct idpf_splitq_tx_compl_desc *)mz->addr;
+	cq->mz = mz;
+	reset_split_tx_complq(cq);
+
+	txq->q_set = true;
+	dev->data->tx_queues[queue_idx] = txq;
+
+	return 0;
+}
+
+static int
+idpf_tx_single_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			   uint16_t nb_desc, unsigned int socket_id,
+			   const struct rte_eth_txconf *tx_conf)
+{
+	struct idpf_vport *vport = dev->data->dev_private;
+	struct idpf_adapter *adapter = vport->adapter;
+	uint16_t tx_rs_thresh, tx_free_thresh;
+	struct idpf_hw *hw = &adapter->hw;
+	const struct rte_memzone *mz;
+	struct idpf_tx_queue *txq;
+	uint32_t ring_size;
+	uint64_t offloads;
+
+	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
+
+	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
+	    nb_desc > IDPF_MAX_RING_DESC ||
+	    nb_desc < IDPF_MIN_RING_DESC) {
+		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
+			     nb_desc);
+		return -EINVAL;
+	}
+
+	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
+		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
+	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
+		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
+	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
+		return -EINVAL;
+
+	/* Allocate the TX queue data structure. */
+	txq = rte_zmalloc_socket("idpf txq",
+				 sizeof(struct idpf_tx_queue),
+				 RTE_CACHE_LINE_SIZE,
+				 socket_id);
+	if (txq == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
+		return -ENOMEM;
+	}
+
+	/* TODO: vlan offload */
+
+	txq->nb_tx_desc = nb_desc;
+	txq->rs_thresh = tx_rs_thresh;
+	txq->free_thresh = tx_free_thresh;
+	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
+	txq->port_id = dev->data->port_id;
+	txq->offloads = offloads;
+	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+
+	/* Allocate software ring */
+	txq->sw_ring =
+		rte_zmalloc_socket("idpf tx sw ring",
+				   sizeof(struct idpf_tx_entry) * nb_desc,
+				   RTE_CACHE_LINE_SIZE,
+				   socket_id);
+	if (txq->sw_ring == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
+		rte_free(txq);
+		return -ENOMEM;
+	}
+
+	/* Allocate TX hardware ring descriptors. */
+	ring_size = sizeof(struct idpf_flex_tx_desc) * nb_desc;
+	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
+	mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
+				      ring_size, IDPF_RING_BASE_ALIGN,
+				      socket_id);
+	if (mz == NULL) {
+		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
+		rte_free(txq->sw_ring);
+		rte_free(txq);
+		return -ENOMEM;
+	}
+
+	txq->tx_ring_phys_addr = mz->iova;
+	txq->tx_ring = (struct idpf_flex_tx_desc *)mz->addr;
+
+	txq->mz = mz;
+	reset_single_tx_queue(txq);
+	txq->q_set = true;
+	dev->data->tx_queues[queue_idx] = txq;
+	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
+			queue_idx * vport->chunks_info.tx_qtail_spacing);
+
+	return 0;
+}
+
+int
+idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+		    uint16_t nb_desc, unsigned int socket_id,
+		    const struct rte_eth_txconf *tx_conf)
+{
+	struct idpf_vport *vport = dev->data->dev_private;
+
+	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+		return idpf_tx_single_queue_setup(dev, queue_idx, nb_desc,
+						  socket_id, tx_conf);
+	else
+		return idpf_tx_split_queue_setup(dev, queue_idx, nb_desc,
+						 socket_id, tx_conf);
+}
diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h
new file mode 100644
index 0000000000..c5c43c3033
--- /dev/null
+++ b/drivers/net/idpf/idpf_rxtx.h
@@ -0,0 +1,76 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _IDPF_RXTX_H_
+#define _IDPF_RXTX_H_
+
+#include "idpf_ethdev.h"
+
+/* In QLEN must be whole number of 32 descriptors. */
+#define IDPF_ALIGN_RING_DESC	32
+#define IDPF_MIN_RING_DESC	32
+#define IDPF_MAX_RING_DESC	4096
+#define IDPF_DMA_MEM_ALIGN	4096
+/* Base address of the HW descriptor ring should be 128B aligned. */
+#define IDPF_RING_BASE_ALIGN	128
+
+#define IDPF_DEFAULT_RX_FREE_THRESH	32
+
+#define IDPF_DEFAULT_TX_RS_THRESH	32
+#define IDPF_DEFAULT_TX_FREE_THRESH	32
+
+struct idpf_tx_entry {
+	struct rte_mbuf *mbuf;
+	uint16_t next_id;
+	uint16_t last_id;
+};
+
+/* Structure associated with each TX queue. */
+struct idpf_tx_queue {
+	const struct rte_memzone *mz;		/* memzone for Tx ring */
+	volatile struct idpf_flex_tx_desc *tx_ring;	/* Tx ring virtual address */
+	volatile union {
+		struct idpf_flex_tx_sched_desc *desc_ring;
+		struct idpf_splitq_tx_compl_desc *compl_ring;
+	};
+	uint64_t tx_ring_phys_addr;		/* Tx ring DMA address */
+	struct idpf_tx_entry *sw_ring;		/* address array of SW ring */
+
+	uint16_t nb_tx_desc;		/* ring length */
+	uint16_t tx_tail;		/* current value of tail */
+	volatile uint8_t *qtx_tail;	/* register address of tail */
+	/* number of used desc since RS bit set */
+	uint16_t nb_used;
+	uint16_t nb_free;
+	uint16_t last_desc_cleaned;	/* last desc have been cleaned*/
+	uint16_t free_thresh;
+	uint16_t rs_thresh;
+
+	uint16_t port_id;
+	uint16_t queue_id;
+	uint64_t offloads;
+	uint16_t next_dd;	/* next to set RS, for VPMD */
+	uint16_t next_rs;	/* next to check DD,  for VPMD */
+
+	bool q_set;		/* if tx queue has been configured */
+	bool q_started;		/* if tx queue has been started */
+	bool tx_deferred_start;	/* don't start this queue in dev start */
+	const struct idpf_txq_ops *ops;
+#define IDPF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1       BIT(0)
+#define IDPF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2       BIT(1)
+
+	/* only valid for split queue mode */
+	uint16_t sw_nb_desc;
+	uint16_t sw_tail;
+	void **txqs;
+	uint32_t tx_start_qid;
+	uint8_t expected_gen_id;
+	struct idpf_tx_queue *complq;
+};
+
+int idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			uint16_t nb_desc, unsigned int socket_id,
+			const struct rte_eth_txconf *tx_conf);
+
+#endif /* _IDPF_RXTX_H_ */
diff --git a/drivers/net/idpf/meson.build b/drivers/net/idpf/meson.build
index ecf73355c3..b632b76656 100644
--- a/drivers/net/idpf/meson.build
+++ b/drivers/net/idpf/meson.build
@@ -11,5 +11,6 @@  deps += ['common_idpf']
 
 sources = files(
     'idpf_ethdev.c',
+    'idpf_rxtx.c',
     'idpf_vchnl.c',
 )