[v6,12/19] net/ngbe: add Rx queue setup and release

Message ID 20210617110005.4132926-13-jiawenwu@trustnetic.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series net: ngbe PMD |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Jiawen Wu June 17, 2021, 10:59 a.m. UTC
  Setup device Rx queue and release Rx queue.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ngbe/meson.build   |   1 +
 drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
 drivers/net/ngbe/ngbe_ethdev.h |  16 ++
 drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
 drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
 5 files changed, 457 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ngbe/ngbe_rxtx.c
 create mode 100644 drivers/net/ngbe/ngbe_rxtx.h
  

Comments

Andrew Rybchenko July 2, 2021, 4:35 p.m. UTC | #1
On 6/17/21 1:59 PM, Jiawen Wu wrote:
> Setup device Rx queue and release Rx queue.
> 
> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
> ---
>  drivers/net/ngbe/meson.build   |   1 +
>  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
>  drivers/net/ngbe/ngbe_ethdev.h |  16 ++
>  drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
>  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
>  5 files changed, 457 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/ngbe/ngbe_rxtx.c
>  create mode 100644 drivers/net/ngbe/ngbe_rxtx.h
> 
> diff --git a/drivers/net/ngbe/meson.build b/drivers/net/ngbe/meson.build
> index 81173fa7f0..9e75b82f1c 100644
> --- a/drivers/net/ngbe/meson.build
> +++ b/drivers/net/ngbe/meson.build
> @@ -12,6 +12,7 @@ objs = [base_objs]
>  
>  sources = files(
>  	'ngbe_ethdev.c',
> +	'ngbe_rxtx.c',
>  )
>  
>  includes += include_directories('base')
> diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
> index c952023e8b..e73606c5f3 100644
> --- a/drivers/net/ngbe/ngbe_ethdev.c
> +++ b/drivers/net/ngbe/ngbe_ethdev.c
> @@ -12,6 +12,7 @@
>  #include "ngbe_logs.h"
>  #include "base/ngbe.h"
>  #include "ngbe_ethdev.h"
> +#include "ngbe_rxtx.h"
>  
>  static int ngbe_dev_close(struct rte_eth_dev *dev);
>  
> @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[] = {
>  	{ .vendor_id = 0, /* sentinel */ },
>  };
>  
> +static const struct rte_eth_desc_lim rx_desc_lim = {
> +	.nb_max = NGBE_RING_DESC_MAX,
> +	.nb_min = NGBE_RING_DESC_MIN,
> +	.nb_align = NGBE_RXD_ALIGN,
> +};
> +
>  static const struct eth_dev_ops ngbe_eth_dev_ops;
>  
>  static inline void
> @@ -241,12 +248,19 @@ static int
>  ngbe_dev_configure(struct rte_eth_dev *dev)
>  {
>  	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
>  
>  	PMD_INIT_FUNC_TRACE();
>  
>  	/* set flag to update link status after init */
>  	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
>  
> +	/*
> +	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
> +	 * allocation Rx preconditions we will reset it.
> +	 */
> +	adapter->rx_bulk_alloc_allowed = true;
> +
>  	return 0;
>  }
>  
> @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)
>  static int
>  ngbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>  {
> -	RTE_SET_USED(dev);
> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> +
> +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
> +
> +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
> +		.rx_thresh = {
> +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
> +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
> +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
> +		},
> +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
> +		.rx_drop_en = 0,
> +		.offloads = 0,
> +	};
> +
> +	dev_info->rx_desc_lim = rx_desc_lim;
>  
>  	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
>  				ETH_LINK_SPEED_10M;
>  
> +	/* Driver-preferred Rx/Tx parameters */
> +	dev_info->default_rxportconf.nb_queues = 1;
> +	dev_info->default_rxportconf.ring_size = 256;
> +
>  	return 0;
>  }
>  
> @@ -570,6 +603,8 @@ static const struct eth_dev_ops ngbe_eth_dev_ops = {
>  	.dev_configure              = ngbe_dev_configure,
>  	.dev_infos_get              = ngbe_dev_info_get,
>  	.link_update                = ngbe_dev_link_update,
> +	.rx_queue_setup             = ngbe_dev_rx_queue_setup,
> +	.rx_queue_release           = ngbe_dev_rx_queue_release,
>  };
>  
>  RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd);
> diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
> index b67508a3de..6580d288c8 100644
> --- a/drivers/net/ngbe/ngbe_ethdev.h
> +++ b/drivers/net/ngbe/ngbe_ethdev.h
> @@ -30,6 +30,7 @@ struct ngbe_interrupt {
>  struct ngbe_adapter {
>  	struct ngbe_hw             hw;
>  	struct ngbe_interrupt      intr;
> +	bool rx_bulk_alloc_allowed;

Shouldn't it be aligned as well as above fields?

>  };
>  
>  static inline struct ngbe_adapter *
> @@ -58,6 +59,13 @@ ngbe_dev_intr(struct rte_eth_dev *dev)
>  	return intr;
>  }
>  
> +void ngbe_dev_rx_queue_release(void *rxq);
> +
> +int  ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +		uint16_t nb_rx_desc, unsigned int socket_id,
> +		const struct rte_eth_rxconf *rx_conf,
> +		struct rte_mempool *mb_pool);
> +
>  int
>  ngbe_dev_link_update_share(struct rte_eth_dev *dev,
>  		int wait_to_complete);
> @@ -66,4 +74,12 @@ ngbe_dev_link_update_share(struct rte_eth_dev *dev,
>  #define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
>  #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
>  
> +/*
> + *  Default values for Rx/Tx configuration
> + */
> +#define NGBE_DEFAULT_RX_FREE_THRESH  32
> +#define NGBE_DEFAULT_RX_PTHRESH      8
> +#define NGBE_DEFAULT_RX_HTHRESH      8
> +#define NGBE_DEFAULT_RX_WTHRESH      0
> +
>  #endif /* _NGBE_ETHDEV_H_ */
> diff --git a/drivers/net/ngbe/ngbe_rxtx.c b/drivers/net/ngbe/ngbe_rxtx.c
> new file mode 100644
> index 0000000000..df0b64dc01
> --- /dev/null
> +++ b/drivers/net/ngbe/ngbe_rxtx.c
> @@ -0,0 +1,308 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
> + * Copyright(c) 2010-2017 Intel Corporation
> + */
> +
> +#include <sys/queue.h>
> +
> +#include <stdint.h>
> +#include <rte_ethdev.h>
> +#include <ethdev_driver.h>
> +#include <rte_malloc.h>
> +
> +#include "ngbe_logs.h"
> +#include "base/ngbe.h"
> +#include "ngbe_ethdev.h"
> +#include "ngbe_rxtx.h"
> +
> +/**
> + * ngbe_free_sc_cluster - free the not-yet-completed scattered cluster
> + *
> + * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
> + * in the sw_sc_ring is not set to NULL but rather points to the next
> + * mbuf of this RSC aggregation (that has not been completed yet and still
> + * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
> + * will just free first "nb_segs" segments of the cluster explicitly by calling
> + * an rte_pktmbuf_free_seg().
> + *
> + * @m scattered cluster head
> + */
> +static void __rte_cold
> +ngbe_free_sc_cluster(struct rte_mbuf *m)
> +{
> +	uint16_t i, nb_segs = m->nb_segs;
> +	struct rte_mbuf *next_seg;
> +
> +	for (i = 0; i < nb_segs; i++) {
> +		next_seg = m->next;
> +		rte_pktmbuf_free_seg(m);
> +		m = next_seg;
> +	}
> +}
> +
> +static void __rte_cold
> +ngbe_rx_queue_release_mbufs(struct ngbe_rx_queue *rxq)
> +{
> +	unsigned int i;
> +
> +	if (rxq->sw_ring != NULL) {
> +		for (i = 0; i < rxq->nb_rx_desc; i++) {
> +			if (rxq->sw_ring[i].mbuf != NULL) {
> +				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
> +				rxq->sw_ring[i].mbuf = NULL;
> +			}
> +		}
> +		if (rxq->rx_nb_avail) {

Compare vs 0 explicitly. However, it looks like that the
check is not required at all. Body may be done unconditionally.

> +			for (i = 0; i < rxq->rx_nb_avail; ++i) {
> +				struct rte_mbuf *mb;
> +
> +				mb = rxq->rx_stage[rxq->rx_next_avail + i];
> +				rte_pktmbuf_free_seg(mb);
> +			}
> +			rxq->rx_nb_avail = 0;
> +		}
> +	}
> +
> +	if (rxq->sw_sc_ring != NULL)
> +		for (i = 0; i < rxq->nb_rx_desc; i++)
> +			if (rxq->sw_sc_ring[i].fbuf) {

Compare vs NULL

> +				ngbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
> +				rxq->sw_sc_ring[i].fbuf = NULL;
> +			}
> +}
> +
> +static void __rte_cold
> +ngbe_rx_queue_release(struct ngbe_rx_queue *rxq)
> +{
> +	if (rxq != NULL) {
> +		ngbe_rx_queue_release_mbufs(rxq);
> +		rte_free(rxq->sw_ring);
> +		rte_free(rxq->sw_sc_ring);
> +		rte_free(rxq);
> +	}
> +}
> +
> +void __rte_cold
> +ngbe_dev_rx_queue_release(void *rxq)
> +{
> +	ngbe_rx_queue_release(rxq);
> +}
> +
> +/*
> + * Check if Rx Burst Bulk Alloc function can be used.
> + * Return
> + *        0: the preconditions are satisfied and the bulk allocation function
> + *           can be used.
> + *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
> + *           function must be used.
> + */
> +static inline int __rte_cold
> +check_rx_burst_bulk_alloc_preconditions(struct ngbe_rx_queue *rxq)
> +{
> +	int ret = 0;
> +
> +	/*
> +	 * Make sure the following pre-conditions are satisfied:
> +	 *   rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST
> +	 *   rxq->rx_free_thresh < rxq->nb_rx_desc
> +	 *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
> +	 * Scattered packets are not supported.  This should be checked
> +	 * outside of this function.
> +	 */
> +	if (!(rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST)) {

Isn't is simpler to read and understand:
rxq->rx_free_thresh < RTE_PMD_NGBE_RX_MAX_BURST

> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> +			     "rxq->rx_free_thresh=%d, "
> +			     "RTE_PMD_NGBE_RX_MAX_BURST=%d",

Do not split format string.

> +			     rxq->rx_free_thresh, RTE_PMD_NGBE_RX_MAX_BURST);
> +		ret = -EINVAL;
> +	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {

rxq->rx_free_thresh >= rxq->nb_rx_desc
is simpler to read

> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> +			     "rxq->rx_free_thresh=%d, "
> +			     "rxq->nb_rx_desc=%d",

Do not split format string.

> +			     rxq->rx_free_thresh, rxq->nb_rx_desc);
> +		ret = -EINVAL;
> +	} else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {

(rxq->nb_rx_desc % rxq->rx_free_thresh) != 0
is easier to read

> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> +			     "rxq->nb_rx_desc=%d, "
> +			     "rxq->rx_free_thresh=%d",
> +			     rxq->nb_rx_desc, rxq->rx_free_thresh);

Do not split format string

> +		ret = -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
> +/* Reset dynamic ngbe_rx_queue fields back to defaults */
> +static void __rte_cold
> +ngbe_reset_rx_queue(struct ngbe_adapter *adapter, struct ngbe_rx_queue *rxq)
> +{
> +	static const struct ngbe_rx_desc zeroed_desc = {
> +						{{0}, {0} }, {{0}, {0} } };
> +	unsigned int i;
> +	uint16_t len = rxq->nb_rx_desc;
> +
> +	/*
> +	 * By default, the Rx queue setup function allocates enough memory for
> +	 * NGBE_RING_DESC_MAX.  The Rx Burst bulk allocation function requires
> +	 * extra memory at the end of the descriptor ring to be zero'd out.
> +	 */
> +	if (adapter->rx_bulk_alloc_allowed)
> +		/* zero out extra memory */
> +		len += RTE_PMD_NGBE_RX_MAX_BURST;
> +
> +	/*
> +	 * Zero out HW ring memory. Zero out extra memory at the end of
> +	 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
> +	 * reads extra memory as zeros.
> +	 */
> +	for (i = 0; i < len; i++)
> +		rxq->rx_ring[i] = zeroed_desc;
> +
> +	/*
> +	 * initialize extra software ring entries. Space for these extra
> +	 * entries is always allocated
> +	 */
> +	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
> +	for (i = rxq->nb_rx_desc; i < len; ++i)
> +		rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
> +
> +	rxq->rx_nb_avail = 0;
> +	rxq->rx_next_avail = 0;
> +	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
> +	rxq->rx_tail = 0;
> +	rxq->nb_rx_hold = 0;
> +	rxq->pkt_first_seg = NULL;
> +	rxq->pkt_last_seg = NULL;
> +}
> +
> +int __rte_cold
> +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> +			 uint16_t queue_idx,
> +			 uint16_t nb_desc,
> +			 unsigned int socket_id,
> +			 const struct rte_eth_rxconf *rx_conf,
> +			 struct rte_mempool *mp)
> +{
> +	const struct rte_memzone *rz;
> +	struct ngbe_rx_queue *rxq;
> +	struct ngbe_hw     *hw;
> +	uint16_t len;
> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hw = ngbe_dev_hw(dev);
> +
> +	/*
> +	 * Validate number of receive descriptors.
> +	 * It must not exceed hardware maximum, and must be multiple
> +	 * of NGBE_ALIGN.
> +	 */
> +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
> +			nb_desc > NGBE_RING_DESC_MAX ||
> +			nb_desc < NGBE_RING_DESC_MIN) {
> +		return -EINVAL;
> +	}

rte_eth_rx_queue_setup cares about it

> +
> +	/* Free memory prior to re-allocation if needed... */
> +	if (dev->data->rx_queues[queue_idx] != NULL) {
> +		ngbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
> +		dev->data->rx_queues[queue_idx] = NULL;
> +	}
> +
> +	/* First allocate the Rx queue data structure */
> +	rxq = rte_zmalloc_socket("ethdev RX queue",
> +				 sizeof(struct ngbe_rx_queue),
> +				 RTE_CACHE_LINE_SIZE, socket_id);
> +	if (rxq == NULL)
> +		return -ENOMEM;
> +	rxq->mb_pool = mp;
> +	rxq->nb_rx_desc = nb_desc;
> +	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> +	rxq->queue_id = queue_idx;
> +	rxq->reg_idx = queue_idx;
> +	rxq->port_id = dev->data->port_id;
> +	rxq->drop_en = rx_conf->rx_drop_en;
> +	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
> +
> +	/*
> +	 * Allocate Rx ring hardware descriptors. A memzone large enough to
> +	 * handle the maximum ring size is allocated in order to allow for
> +	 * resizing in later calls to the queue setup function.
> +	 */
> +	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
> +				      RX_RING_SZ, NGBE_ALIGN, socket_id);
> +	if (rz == NULL) {
> +		ngbe_rx_queue_release(rxq);
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Zero init all the descriptors in the ring.
> +	 */
> +	memset(rz->addr, 0, RX_RING_SZ);
> +
> +	rxq->rdt_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXWP(rxq->reg_idx));
> +	rxq->rdh_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXRP(rxq->reg_idx));
> +
> +	rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
> +	rxq->rx_ring = (struct ngbe_rx_desc *)TMZ_VADDR(rz);
> +
> +	/*
> +	 * Certain constraints must be met in order to use the bulk buffer
> +	 * allocation Rx burst function. If any of Rx queues doesn't meet them
> +	 * the feature should be disabled for the whole port.
> +	 */
> +	if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
> +		PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
> +				    "preconditions - canceling the feature for "
> +				    "the whole port[%d]",

Do not split format string.

> +			     rxq->queue_id, rxq->port_id);
> +		adapter->rx_bulk_alloc_allowed = false;
> +	}
> +
> +	/*
> +	 * Allocate software ring. Allow for space at the end of the
> +	 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
> +	 * function does not access an invalid memory region.
> +	 */
> +	len = nb_desc;
> +	if (adapter->rx_bulk_alloc_allowed)
> +		len += RTE_PMD_NGBE_RX_MAX_BURST;
> +
> +	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
> +					  sizeof(struct ngbe_rx_entry) * len,
> +					  RTE_CACHE_LINE_SIZE, socket_id);
> +	if (rxq->sw_ring == NULL) {
> +		ngbe_rx_queue_release(rxq);
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Always allocate even if it's not going to be needed in order to
> +	 * simplify the code.
> +	 *
> +	 * This ring is used in Scattered Rx cases and Scattered Rx may
> +	 * be requested in ngbe_dev_rx_init(), which is called later from
> +	 * dev_start() flow.
> +	 */
> +	rxq->sw_sc_ring =
> +		rte_zmalloc_socket("rxq->sw_sc_ring",
> +				  sizeof(struct ngbe_scattered_rx_entry) * len,
> +				  RTE_CACHE_LINE_SIZE, socket_id);
> +	if (rxq->sw_sc_ring == NULL) {
> +		ngbe_rx_queue_release(rxq);
> +		return -ENOMEM;
> +	}
> +
> +	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
> +			    "dma_addr=0x%" PRIx64,

Do not split format string.

> +		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
> +		     rxq->rx_ring_phys_addr);
> +
> +	dev->data->rx_queues[queue_idx] = rxq;
> +
> +	ngbe_reset_rx_queue(adapter, rxq);
> +
> +	return 0;
> +}
> +
> diff --git a/drivers/net/ngbe/ngbe_rxtx.h b/drivers/net/ngbe/ngbe_rxtx.h
> new file mode 100644
> index 0000000000..92b9a9fd1b
> --- /dev/null
> +++ b/drivers/net/ngbe/ngbe_rxtx.h
> @@ -0,0 +1,96 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
> + * Copyright(c) 2010-2017 Intel Corporation
> + */
> +
> +#ifndef _NGBE_RXTX_H_
> +#define _NGBE_RXTX_H_
> +
> +/*****************************************************************************
> + * Receive Descriptor
> + *****************************************************************************/
> +struct ngbe_rx_desc {
> +	struct {
> +		union {
> +			rte_le32_t dw0;
> +			struct {
> +				rte_le16_t pkt;
> +				rte_le16_t hdr;
> +			} lo;
> +		};
> +		union {
> +			rte_le32_t dw1;
> +			struct {
> +				rte_le16_t ipid;
> +				rte_le16_t csum;
> +			} hi;
> +		};
> +	} qw0; /* also as r.pkt_addr */
> +	struct {
> +		union {
> +			rte_le32_t dw2;
> +			struct {
> +				rte_le32_t status;
> +			} lo;
> +		};
> +		union {
> +			rte_le32_t dw3;
> +			struct {
> +				rte_le16_t len;
> +				rte_le16_t tag;
> +			} hi;
> +		};
> +	} qw1; /* also as r.hdr_addr */
> +};
> +
> +#define RTE_PMD_NGBE_RX_MAX_BURST 32
> +
> +#define RX_RING_SZ ((NGBE_RING_DESC_MAX + RTE_PMD_NGBE_RX_MAX_BURST) * \
> +		    sizeof(struct ngbe_rx_desc))
> +
> +
> +/**
> + * Structure associated with each descriptor of the Rx ring of a Rx queue.
> + */
> +struct ngbe_rx_entry {
> +	struct rte_mbuf *mbuf; /**< mbuf associated with Rx descriptor. */
> +};
> +
> +struct ngbe_scattered_rx_entry {
> +	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
> +};
> +
> +/**
> + * Structure associated with each Rx queue.
> + */
> +struct ngbe_rx_queue {
> +	struct rte_mempool  *mb_pool; /**< mbuf pool to populate Rx ring. */
> +	volatile struct ngbe_rx_desc *rx_ring; /**< Rx ring virtual address. */
> +	uint64_t            rx_ring_phys_addr; /**< Rx ring DMA address. */
> +	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
> +	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
> +	struct ngbe_rx_entry *sw_ring; /**< address of Rx software ring. */
> +	/**< address of scattered Rx software ring. */
> +	struct ngbe_scattered_rx_entry *sw_sc_ring;
> +	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
> +	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
> +	uint16_t            nb_rx_desc; /**< number of Rx descriptors. */
> +	uint16_t            rx_tail;  /**< current value of RDT register. */
> +	uint16_t            nb_rx_hold; /**< number of held free Rx desc. */
> +	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
> +	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
> +	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
> +	uint16_t            rx_free_thresh; /**< max free Rx desc to hold. */
> +	uint16_t            queue_id; /**< RX queue index. */
> +	uint16_t            reg_idx;  /**< RX queue register index. */
> +	/**< Packet type mask for different NICs. */
> +	uint16_t            port_id;  /**< Device port identifier. */
> +	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
> +	uint8_t             rx_deferred_start; /**< not in global dev start. */
> +	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
> +	struct rte_mbuf fake_mbuf;
> +	/** hold packets to return to application */
> +	struct rte_mbuf *rx_stage[RTE_PMD_NGBE_RX_MAX_BURST * 2];
> +};
> +
> +#endif /* _NGBE_RXTX_H_ */
>
  
Jiawen Wu July 5, 2021, 8:36 a.m. UTC | #2
On July 3, 2021 12:36 AM, Andrew Rybchenko wrote:
> On 6/17/21 1:59 PM, Jiawen Wu wrote:
> > Setup device Rx queue and release Rx queue.
> >
> > Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
> > ---
> >  drivers/net/ngbe/meson.build   |   1 +
> >  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
> > drivers/net/ngbe/ngbe_ethdev.h |  16 ++
> >  drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
> >  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
> >  5 files changed, 457 insertions(+), 1 deletion(-)  create mode 100644
> > drivers/net/ngbe/ngbe_rxtx.c  create mode 100644
> > drivers/net/ngbe/ngbe_rxtx.h
> >
> > diff --git a/drivers/net/ngbe/meson.build
> > b/drivers/net/ngbe/meson.build index 81173fa7f0..9e75b82f1c 100644
> > --- a/drivers/net/ngbe/meson.build
> > +++ b/drivers/net/ngbe/meson.build
> > @@ -12,6 +12,7 @@ objs = [base_objs]
> >
> >  sources = files(
> >  	'ngbe_ethdev.c',
> > +	'ngbe_rxtx.c',
> >  )
> >
> >  includes += include_directories('base') diff --git
> > a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
> > index c952023e8b..e73606c5f3 100644
> > --- a/drivers/net/ngbe/ngbe_ethdev.c
> > +++ b/drivers/net/ngbe/ngbe_ethdev.c
> > @@ -12,6 +12,7 @@
> >  #include "ngbe_logs.h"
> >  #include "base/ngbe.h"
> >  #include "ngbe_ethdev.h"
> > +#include "ngbe_rxtx.h"
> >
> >  static int ngbe_dev_close(struct rte_eth_dev *dev);
> >
> > @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[] = {
> >  	{ .vendor_id = 0, /* sentinel */ },
> >  };
> >
> > +static const struct rte_eth_desc_lim rx_desc_lim = {
> > +	.nb_max = NGBE_RING_DESC_MAX,
> > +	.nb_min = NGBE_RING_DESC_MIN,
> > +	.nb_align = NGBE_RXD_ALIGN,
> > +};
> > +
> >  static const struct eth_dev_ops ngbe_eth_dev_ops;
> >
> >  static inline void
> > @@ -241,12 +248,19 @@ static int
> >  ngbe_dev_configure(struct rte_eth_dev *dev)  {
> >  	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
> > +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
> >
> >  	PMD_INIT_FUNC_TRACE();
> >
> >  	/* set flag to update link status after init */
> >  	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
> >
> > +	/*
> > +	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
> > +	 * allocation Rx preconditions we will reset it.
> > +	 */
> > +	adapter->rx_bulk_alloc_allowed = true;
> > +
> >  	return 0;
> >  }
> >
> > @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)  static
> > int  ngbe_dev_info_get(struct rte_eth_dev *dev, struct
> > rte_eth_dev_info *dev_info)  {
> > -	RTE_SET_USED(dev);
> > +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> > +
> > +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
> > +
> > +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
> > +		.rx_thresh = {
> > +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
> > +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
> > +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
> > +		},
> > +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
> > +		.rx_drop_en = 0,
> > +		.offloads = 0,
> > +	};
> > +
> > +	dev_info->rx_desc_lim = rx_desc_lim;
> >
> >  	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
> >  				ETH_LINK_SPEED_10M;
> >
> > +	/* Driver-preferred Rx/Tx parameters */
> > +	dev_info->default_rxportconf.nb_queues = 1;
> > +	dev_info->default_rxportconf.ring_size = 256;
> > +
> >  	return 0;
> >  }
> >
> > @@ -570,6 +603,8 @@ static const struct eth_dev_ops ngbe_eth_dev_ops =
> {
> >  	.dev_configure              = ngbe_dev_configure,
> >  	.dev_infos_get              = ngbe_dev_info_get,
> >  	.link_update                = ngbe_dev_link_update,
> > +	.rx_queue_setup             = ngbe_dev_rx_queue_setup,
> > +	.rx_queue_release           = ngbe_dev_rx_queue_release,
> >  };
> >
> >  RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd); diff --git
> > a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
> > index b67508a3de..6580d288c8 100644
> > --- a/drivers/net/ngbe/ngbe_ethdev.h
> > +++ b/drivers/net/ngbe/ngbe_ethdev.h
> > @@ -30,6 +30,7 @@ struct ngbe_interrupt {  struct ngbe_adapter {
> >  	struct ngbe_hw             hw;
> >  	struct ngbe_interrupt      intr;
> > +	bool rx_bulk_alloc_allowed;
> 
> Shouldn't it be aligned as well as above fields?
> 
> >  };
> >
> >  static inline struct ngbe_adapter *
> > @@ -58,6 +59,13 @@ ngbe_dev_intr(struct rte_eth_dev *dev)
> >  	return intr;
> >  }
> >
> > +void ngbe_dev_rx_queue_release(void *rxq);
> > +
> > +int  ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> > +		uint16_t nb_rx_desc, unsigned int socket_id,
> > +		const struct rte_eth_rxconf *rx_conf,
> > +		struct rte_mempool *mb_pool);
> > +
> >  int
> >  ngbe_dev_link_update_share(struct rte_eth_dev *dev,
> >  		int wait_to_complete);
> > @@ -66,4 +74,12 @@ ngbe_dev_link_update_share(struct rte_eth_dev *dev,
> >  #define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
> >  #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
> >
> > +/*
> > + *  Default values for Rx/Tx configuration  */ #define
> > +NGBE_DEFAULT_RX_FREE_THRESH  32
> > +#define NGBE_DEFAULT_RX_PTHRESH      8
> > +#define NGBE_DEFAULT_RX_HTHRESH      8
> > +#define NGBE_DEFAULT_RX_WTHRESH      0
> > +
> >  #endif /* _NGBE_ETHDEV_H_ */
> > diff --git a/drivers/net/ngbe/ngbe_rxtx.c
> > b/drivers/net/ngbe/ngbe_rxtx.c new file mode 100644 index
> > 0000000000..df0b64dc01
> > --- /dev/null
> > +++ b/drivers/net/ngbe/ngbe_rxtx.c
> > @@ -0,0 +1,308 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
> > + * Copyright(c) 2010-2017 Intel Corporation  */
> > +
> > +#include <sys/queue.h>
> > +
> > +#include <stdint.h>
> > +#include <rte_ethdev.h>
> > +#include <ethdev_driver.h>
> > +#include <rte_malloc.h>
> > +
> > +#include "ngbe_logs.h"
> > +#include "base/ngbe.h"
> > +#include "ngbe_ethdev.h"
> > +#include "ngbe_rxtx.h"
> > +
> > +/**
> > + * ngbe_free_sc_cluster - free the not-yet-completed scattered
> > +cluster
> > + *
> > + * The "next" pointer of the last segment of (not-yet-completed) RSC
> > +clusters
> > + * in the sw_sc_ring is not set to NULL but rather points to the next
> > + * mbuf of this RSC aggregation (that has not been completed yet and
> > +still
> > + * resides on the HW ring). So, instead of calling for
> > +rte_pktmbuf_free() we
> > + * will just free first "nb_segs" segments of the cluster explicitly
> > +by calling
> > + * an rte_pktmbuf_free_seg().
> > + *
> > + * @m scattered cluster head
> > + */
> > +static void __rte_cold
> > +ngbe_free_sc_cluster(struct rte_mbuf *m) {
> > +	uint16_t i, nb_segs = m->nb_segs;
> > +	struct rte_mbuf *next_seg;
> > +
> > +	for (i = 0; i < nb_segs; i++) {
> > +		next_seg = m->next;
> > +		rte_pktmbuf_free_seg(m);
> > +		m = next_seg;
> > +	}
> > +}
> > +
> > +static void __rte_cold
> > +ngbe_rx_queue_release_mbufs(struct ngbe_rx_queue *rxq) {
> > +	unsigned int i;
> > +
> > +	if (rxq->sw_ring != NULL) {
> > +		for (i = 0; i < rxq->nb_rx_desc; i++) {
> > +			if (rxq->sw_ring[i].mbuf != NULL) {
> > +				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
> > +				rxq->sw_ring[i].mbuf = NULL;
> > +			}
> > +		}
> > +		if (rxq->rx_nb_avail) {
> 
> Compare vs 0 explicitly. However, it looks like that the check is not required at
> all. Body may be done unconditionally.
> 
> > +			for (i = 0; i < rxq->rx_nb_avail; ++i) {
> > +				struct rte_mbuf *mb;
> > +
> > +				mb = rxq->rx_stage[rxq->rx_next_avail + i];
> > +				rte_pktmbuf_free_seg(mb);
> > +			}
> > +			rxq->rx_nb_avail = 0;
> > +		}
> > +	}
> > +
> > +	if (rxq->sw_sc_ring != NULL)
> > +		for (i = 0; i < rxq->nb_rx_desc; i++)
> > +			if (rxq->sw_sc_ring[i].fbuf) {
> 
> Compare vs NULL
> 
> > +				ngbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
> > +				rxq->sw_sc_ring[i].fbuf = NULL;
> > +			}
> > +}
> > +
> > +static void __rte_cold
> > +ngbe_rx_queue_release(struct ngbe_rx_queue *rxq) {
> > +	if (rxq != NULL) {
> > +		ngbe_rx_queue_release_mbufs(rxq);
> > +		rte_free(rxq->sw_ring);
> > +		rte_free(rxq->sw_sc_ring);
> > +		rte_free(rxq);
> > +	}
> > +}
> > +
> > +void __rte_cold
> > +ngbe_dev_rx_queue_release(void *rxq)
> > +{
> > +	ngbe_rx_queue_release(rxq);
> > +}
> > +
> > +/*
> > + * Check if Rx Burst Bulk Alloc function can be used.
> > + * Return
> > + *        0: the preconditions are satisfied and the bulk allocation function
> > + *           can be used.
> > + *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
> > + *           function must be used.
> > + */
> > +static inline int __rte_cold
> > +check_rx_burst_bulk_alloc_preconditions(struct ngbe_rx_queue *rxq) {
> > +	int ret = 0;
> > +
> > +	/*
> > +	 * Make sure the following pre-conditions are satisfied:
> > +	 *   rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST
> > +	 *   rxq->rx_free_thresh < rxq->nb_rx_desc
> > +	 *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
> > +	 * Scattered packets are not supported.  This should be checked
> > +	 * outside of this function.
> > +	 */
> > +	if (!(rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST)) {
> 
> Isn't is simpler to read and understand:
> rxq->rx_free_thresh < RTE_PMD_NGBE_RX_MAX_BURST
> 
> > +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> > +			     "rxq->rx_free_thresh=%d, "
> > +			     "RTE_PMD_NGBE_RX_MAX_BURST=%d",
> 
> Do not split format string.
> 
> > +			     rxq->rx_free_thresh, RTE_PMD_NGBE_RX_MAX_BURST);
> > +		ret = -EINVAL;
> > +	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
> 
> rxq->rx_free_thresh >= rxq->nb_rx_desc
> is simpler to read
> 
> > +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> > +			     "rxq->rx_free_thresh=%d, "
> > +			     "rxq->nb_rx_desc=%d",
> 
> Do not split format string.
> 
> > +			     rxq->rx_free_thresh, rxq->nb_rx_desc);
> > +		ret = -EINVAL;
> > +	} else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
> 
> (rxq->nb_rx_desc % rxq->rx_free_thresh) != 0 is easier to read
> 
> > +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
> > +			     "rxq->nb_rx_desc=%d, "
> > +			     "rxq->rx_free_thresh=%d",
> > +			     rxq->nb_rx_desc, rxq->rx_free_thresh);
> 
> Do not split format string
> 
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +/* Reset dynamic ngbe_rx_queue fields back to defaults */ static void
> > +__rte_cold ngbe_reset_rx_queue(struct ngbe_adapter *adapter, struct
> > +ngbe_rx_queue *rxq) {
> > +	static const struct ngbe_rx_desc zeroed_desc = {
> > +						{{0}, {0} }, {{0}, {0} } };
> > +	unsigned int i;
> > +	uint16_t len = rxq->nb_rx_desc;
> > +
> > +	/*
> > +	 * By default, the Rx queue setup function allocates enough memory for
> > +	 * NGBE_RING_DESC_MAX.  The Rx Burst bulk allocation function requires
> > +	 * extra memory at the end of the descriptor ring to be zero'd out.
> > +	 */
> > +	if (adapter->rx_bulk_alloc_allowed)
> > +		/* zero out extra memory */
> > +		len += RTE_PMD_NGBE_RX_MAX_BURST;
> > +
> > +	/*
> > +	 * Zero out HW ring memory. Zero out extra memory at the end of
> > +	 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
> > +	 * reads extra memory as zeros.
> > +	 */
> > +	for (i = 0; i < len; i++)
> > +		rxq->rx_ring[i] = zeroed_desc;
> > +
> > +	/*
> > +	 * initialize extra software ring entries. Space for these extra
> > +	 * entries is always allocated
> > +	 */
> > +	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
> > +	for (i = rxq->nb_rx_desc; i < len; ++i)
> > +		rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
> > +
> > +	rxq->rx_nb_avail = 0;
> > +	rxq->rx_next_avail = 0;
> > +	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
> > +	rxq->rx_tail = 0;
> > +	rxq->nb_rx_hold = 0;
> > +	rxq->pkt_first_seg = NULL;
> > +	rxq->pkt_last_seg = NULL;
> > +}
> > +
> > +int __rte_cold
> > +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> > +			 uint16_t queue_idx,
> > +			 uint16_t nb_desc,
> > +			 unsigned int socket_id,
> > +			 const struct rte_eth_rxconf *rx_conf,
> > +			 struct rte_mempool *mp)
> > +{
> > +	const struct rte_memzone *rz;
> > +	struct ngbe_rx_queue *rxq;
> > +	struct ngbe_hw     *hw;
> > +	uint16_t len;
> > +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
> > +
> > +	PMD_INIT_FUNC_TRACE();
> > +	hw = ngbe_dev_hw(dev);
> > +
> > +	/*
> > +	 * Validate number of receive descriptors.
> > +	 * It must not exceed hardware maximum, and must be multiple
> > +	 * of NGBE_ALIGN.
> > +	 */
> > +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
> > +			nb_desc > NGBE_RING_DESC_MAX ||
> > +			nb_desc < NGBE_RING_DESC_MIN) {
> > +		return -EINVAL;
> > +	}
> 
> rte_eth_rx_queue_setup cares about it
> 

I don't quite understand.

> > +
> > +	/* Free memory prior to re-allocation if needed... */
> > +	if (dev->data->rx_queues[queue_idx] != NULL) {
> > +		ngbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
> > +		dev->data->rx_queues[queue_idx] = NULL;
> > +	}
> > +
> > +	/* First allocate the Rx queue data structure */
> > +	rxq = rte_zmalloc_socket("ethdev RX queue",
> > +				 sizeof(struct ngbe_rx_queue),
> > +				 RTE_CACHE_LINE_SIZE, socket_id);
> > +	if (rxq == NULL)
> > +		return -ENOMEM;
> > +	rxq->mb_pool = mp;
> > +	rxq->nb_rx_desc = nb_desc;
> > +	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> > +	rxq->queue_id = queue_idx;
> > +	rxq->reg_idx = queue_idx;
> > +	rxq->port_id = dev->data->port_id;
> > +	rxq->drop_en = rx_conf->rx_drop_en;
> > +	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
> > +
> > +	/*
> > +	 * Allocate Rx ring hardware descriptors. A memzone large enough to
> > +	 * handle the maximum ring size is allocated in order to allow for
> > +	 * resizing in later calls to the queue setup function.
> > +	 */
> > +	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
> > +				      RX_RING_SZ, NGBE_ALIGN, socket_id);
> > +	if (rz == NULL) {
> > +		ngbe_rx_queue_release(rxq);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/*
> > +	 * Zero init all the descriptors in the ring.
> > +	 */
> > +	memset(rz->addr, 0, RX_RING_SZ);
> > +
> > +	rxq->rdt_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXWP(rxq->reg_idx));
> > +	rxq->rdh_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXRP(rxq->reg_idx));
> > +
> > +	rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
> > +	rxq->rx_ring = (struct ngbe_rx_desc *)TMZ_VADDR(rz);
> > +
> > +	/*
> > +	 * Certain constraints must be met in order to use the bulk buffer
> > +	 * allocation Rx burst function. If any of Rx queues doesn't meet them
> > +	 * the feature should be disabled for the whole port.
> > +	 */
> > +	if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
> > +		PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
> > +				    "preconditions - canceling the feature for "
> > +				    "the whole port[%d]",
> 
> Do not split format string.
> 
> > +			     rxq->queue_id, rxq->port_id);
> > +		adapter->rx_bulk_alloc_allowed = false;
> > +	}
> > +
> > +	/*
> > +	 * Allocate software ring. Allow for space at the end of the
> > +	 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
> > +	 * function does not access an invalid memory region.
> > +	 */
> > +	len = nb_desc;
> > +	if (adapter->rx_bulk_alloc_allowed)
> > +		len += RTE_PMD_NGBE_RX_MAX_BURST;
> > +
> > +	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
> > +					  sizeof(struct ngbe_rx_entry) * len,
> > +					  RTE_CACHE_LINE_SIZE, socket_id);
> > +	if (rxq->sw_ring == NULL) {
> > +		ngbe_rx_queue_release(rxq);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/*
> > +	 * Always allocate even if it's not going to be needed in order to
> > +	 * simplify the code.
> > +	 *
> > +	 * This ring is used in Scattered Rx cases and Scattered Rx may
> > +	 * be requested in ngbe_dev_rx_init(), which is called later from
> > +	 * dev_start() flow.
> > +	 */
> > +	rxq->sw_sc_ring =
> > +		rte_zmalloc_socket("rxq->sw_sc_ring",
> > +				  sizeof(struct ngbe_scattered_rx_entry) * len,
> > +				  RTE_CACHE_LINE_SIZE, socket_id);
> > +	if (rxq->sw_sc_ring == NULL) {
> > +		ngbe_rx_queue_release(rxq);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
> > +			    "dma_addr=0x%" PRIx64,
> 
> Do not split format string.
> 
> > +		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
> > +		     rxq->rx_ring_phys_addr);
> > +
> > +	dev->data->rx_queues[queue_idx] = rxq;
> > +
> > +	ngbe_reset_rx_queue(adapter, rxq);
> > +
> > +	return 0;
> > +}
> > +
  
Andrew Rybchenko July 5, 2021, 9:08 a.m. UTC | #3
On 7/5/21 11:36 AM, Jiawen Wu wrote:
> On July 3, 2021 12:36 AM, Andrew Rybchenko wrote:
>> On 6/17/21 1:59 PM, Jiawen Wu wrote:
>>> Setup device Rx queue and release Rx queue.
>>>
>>> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
>>> ---
>>>  drivers/net/ngbe/meson.build   |   1 +
>>>  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
>>> drivers/net/ngbe/ngbe_ethdev.h |  16 ++
>>>  drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
>>>  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
>>>  5 files changed, 457 insertions(+), 1 deletion(-)  create mode 100644
>>> drivers/net/ngbe/ngbe_rxtx.c  create mode 100644
>>> drivers/net/ngbe/ngbe_rxtx.h
>>>
>>> diff --git a/drivers/net/ngbe/meson.build
>>> b/drivers/net/ngbe/meson.build index 81173fa7f0..9e75b82f1c 100644
>>> --- a/drivers/net/ngbe/meson.build
>>> +++ b/drivers/net/ngbe/meson.build
>>> @@ -12,6 +12,7 @@ objs = [base_objs]
>>>
>>>  sources = files(
>>>  	'ngbe_ethdev.c',
>>> +	'ngbe_rxtx.c',
>>>  )
>>>
>>>  includes += include_directories('base') diff --git
>>> a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
>>> index c952023e8b..e73606c5f3 100644
>>> --- a/drivers/net/ngbe/ngbe_ethdev.c
>>> +++ b/drivers/net/ngbe/ngbe_ethdev.c
>>> @@ -12,6 +12,7 @@
>>>  #include "ngbe_logs.h"
>>>  #include "base/ngbe.h"
>>>  #include "ngbe_ethdev.h"
>>> +#include "ngbe_rxtx.h"
>>>
>>>  static int ngbe_dev_close(struct rte_eth_dev *dev);
>>>
>>> @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[] = {
>>>  	{ .vendor_id = 0, /* sentinel */ },
>>>  };
>>>
>>> +static const struct rte_eth_desc_lim rx_desc_lim = {
>>> +	.nb_max = NGBE_RING_DESC_MAX,
>>> +	.nb_min = NGBE_RING_DESC_MIN,
>>> +	.nb_align = NGBE_RXD_ALIGN,
>>> +};
>>> +
>>>  static const struct eth_dev_ops ngbe_eth_dev_ops;
>>>
>>>  static inline void
>>> @@ -241,12 +248,19 @@ static int
>>>  ngbe_dev_configure(struct rte_eth_dev *dev)  {
>>>  	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
>>> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
>>>
>>>  	PMD_INIT_FUNC_TRACE();
>>>
>>>  	/* set flag to update link status after init */
>>>  	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
>>>
>>> +	/*
>>> +	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
>>> +	 * allocation Rx preconditions we will reset it.
>>> +	 */
>>> +	adapter->rx_bulk_alloc_allowed = true;
>>> +
>>>  	return 0;
>>>  }
>>>
>>> @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)  static
>>> int  ngbe_dev_info_get(struct rte_eth_dev *dev, struct
>>> rte_eth_dev_info *dev_info)  {
>>> -	RTE_SET_USED(dev);
>>> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
>>> +
>>> +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
>>> +
>>> +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
>>> +		.rx_thresh = {
>>> +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
>>> +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
>>> +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
>>> +		},
>>> +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
>>> +		.rx_drop_en = 0,
>>> +		.offloads = 0,
>>> +	};
>>> +
>>> +	dev_info->rx_desc_lim = rx_desc_lim;
>>>
>>>  	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
>>>  				ETH_LINK_SPEED_10M;
>>>
>>> +	/* Driver-preferred Rx/Tx parameters */
>>> +	dev_info->default_rxportconf.nb_queues = 1;
>>> +	dev_info->default_rxportconf.ring_size = 256;
>>> +
>>>  	return 0;
>>>  }
>>>
>>> @@ -570,6 +603,8 @@ static const struct eth_dev_ops ngbe_eth_dev_ops =
>> {
>>>  	.dev_configure              = ngbe_dev_configure,
>>>  	.dev_infos_get              = ngbe_dev_info_get,
>>>  	.link_update                = ngbe_dev_link_update,
>>> +	.rx_queue_setup             = ngbe_dev_rx_queue_setup,
>>> +	.rx_queue_release           = ngbe_dev_rx_queue_release,
>>>  };
>>>
>>>  RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd); diff --git
>>> a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
>>> index b67508a3de..6580d288c8 100644
>>> --- a/drivers/net/ngbe/ngbe_ethdev.h
>>> +++ b/drivers/net/ngbe/ngbe_ethdev.h
>>> @@ -30,6 +30,7 @@ struct ngbe_interrupt {  struct ngbe_adapter {
>>>  	struct ngbe_hw             hw;
>>>  	struct ngbe_interrupt      intr;
>>> +	bool rx_bulk_alloc_allowed;
>>
>> Shouldn't it be aligned as well as above fields?
>>
>>>  };
>>>
>>>  static inline struct ngbe_adapter *
>>> @@ -58,6 +59,13 @@ ngbe_dev_intr(struct rte_eth_dev *dev)
>>>  	return intr;
>>>  }
>>>
>>> +void ngbe_dev_rx_queue_release(void *rxq);
>>> +
>>> +int  ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>>> +		uint16_t nb_rx_desc, unsigned int socket_id,
>>> +		const struct rte_eth_rxconf *rx_conf,
>>> +		struct rte_mempool *mb_pool);
>>> +
>>>  int
>>>  ngbe_dev_link_update_share(struct rte_eth_dev *dev,
>>>  		int wait_to_complete);
>>> @@ -66,4 +74,12 @@ ngbe_dev_link_update_share(struct rte_eth_dev *dev,
>>>  #define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
>>>  #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
>>>
>>> +/*
>>> + *  Default values for Rx/Tx configuration  */ #define
>>> +NGBE_DEFAULT_RX_FREE_THRESH  32
>>> +#define NGBE_DEFAULT_RX_PTHRESH      8
>>> +#define NGBE_DEFAULT_RX_HTHRESH      8
>>> +#define NGBE_DEFAULT_RX_WTHRESH      0
>>> +
>>>  #endif /* _NGBE_ETHDEV_H_ */
>>> diff --git a/drivers/net/ngbe/ngbe_rxtx.c
>>> b/drivers/net/ngbe/ngbe_rxtx.c new file mode 100644 index
>>> 0000000000..df0b64dc01
>>> --- /dev/null
>>> +++ b/drivers/net/ngbe/ngbe_rxtx.c
>>> @@ -0,0 +1,308 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
>>> + * Copyright(c) 2010-2017 Intel Corporation  */
>>> +
>>> +#include <sys/queue.h>
>>> +
>>> +#include <stdint.h>
>>> +#include <rte_ethdev.h>
>>> +#include <ethdev_driver.h>
>>> +#include <rte_malloc.h>
>>> +
>>> +#include "ngbe_logs.h"
>>> +#include "base/ngbe.h"
>>> +#include "ngbe_ethdev.h"
>>> +#include "ngbe_rxtx.h"
>>> +
>>> +/**
>>> + * ngbe_free_sc_cluster - free the not-yet-completed scattered
>>> +cluster
>>> + *
>>> + * The "next" pointer of the last segment of (not-yet-completed) RSC
>>> +clusters
>>> + * in the sw_sc_ring is not set to NULL but rather points to the next
>>> + * mbuf of this RSC aggregation (that has not been completed yet and
>>> +still
>>> + * resides on the HW ring). So, instead of calling for
>>> +rte_pktmbuf_free() we
>>> + * will just free first "nb_segs" segments of the cluster explicitly
>>> +by calling
>>> + * an rte_pktmbuf_free_seg().
>>> + *
>>> + * @m scattered cluster head
>>> + */
>>> +static void __rte_cold
>>> +ngbe_free_sc_cluster(struct rte_mbuf *m) {
>>> +	uint16_t i, nb_segs = m->nb_segs;
>>> +	struct rte_mbuf *next_seg;
>>> +
>>> +	for (i = 0; i < nb_segs; i++) {
>>> +		next_seg = m->next;
>>> +		rte_pktmbuf_free_seg(m);
>>> +		m = next_seg;
>>> +	}
>>> +}
>>> +
>>> +static void __rte_cold
>>> +ngbe_rx_queue_release_mbufs(struct ngbe_rx_queue *rxq) {
>>> +	unsigned int i;
>>> +
>>> +	if (rxq->sw_ring != NULL) {
>>> +		for (i = 0; i < rxq->nb_rx_desc; i++) {
>>> +			if (rxq->sw_ring[i].mbuf != NULL) {
>>> +				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
>>> +				rxq->sw_ring[i].mbuf = NULL;
>>> +			}
>>> +		}
>>> +		if (rxq->rx_nb_avail) {
>>
>> Compare vs 0 explicitly. However, it looks like that the check is not required at
>> all. Body may be done unconditionally.
>>
>>> +			for (i = 0; i < rxq->rx_nb_avail; ++i) {
>>> +				struct rte_mbuf *mb;
>>> +
>>> +				mb = rxq->rx_stage[rxq->rx_next_avail + i];
>>> +				rte_pktmbuf_free_seg(mb);
>>> +			}
>>> +			rxq->rx_nb_avail = 0;
>>> +		}
>>> +	}
>>> +
>>> +	if (rxq->sw_sc_ring != NULL)
>>> +		for (i = 0; i < rxq->nb_rx_desc; i++)
>>> +			if (rxq->sw_sc_ring[i].fbuf) {
>>
>> Compare vs NULL
>>
>>> +				ngbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
>>> +				rxq->sw_sc_ring[i].fbuf = NULL;
>>> +			}
>>> +}
>>> +
>>> +static void __rte_cold
>>> +ngbe_rx_queue_release(struct ngbe_rx_queue *rxq) {
>>> +	if (rxq != NULL) {
>>> +		ngbe_rx_queue_release_mbufs(rxq);
>>> +		rte_free(rxq->sw_ring);
>>> +		rte_free(rxq->sw_sc_ring);
>>> +		rte_free(rxq);
>>> +	}
>>> +}
>>> +
>>> +void __rte_cold
>>> +ngbe_dev_rx_queue_release(void *rxq)
>>> +{
>>> +	ngbe_rx_queue_release(rxq);
>>> +}
>>> +
>>> +/*
>>> + * Check if Rx Burst Bulk Alloc function can be used.
>>> + * Return
>>> + *        0: the preconditions are satisfied and the bulk allocation function
>>> + *           can be used.
>>> + *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
>>> + *           function must be used.
>>> + */
>>> +static inline int __rte_cold
>>> +check_rx_burst_bulk_alloc_preconditions(struct ngbe_rx_queue *rxq) {
>>> +	int ret = 0;
>>> +
>>> +	/*
>>> +	 * Make sure the following pre-conditions are satisfied:
>>> +	 *   rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST
>>> +	 *   rxq->rx_free_thresh < rxq->nb_rx_desc
>>> +	 *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
>>> +	 * Scattered packets are not supported.  This should be checked
>>> +	 * outside of this function.
>>> +	 */
>>> +	if (!(rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST)) {
>>
>> Isn't is simpler to read and understand:
>> rxq->rx_free_thresh < RTE_PMD_NGBE_RX_MAX_BURST
>>
>>> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
>>> +			     "rxq->rx_free_thresh=%d, "
>>> +			     "RTE_PMD_NGBE_RX_MAX_BURST=%d",
>>
>> Do not split format string.
>>
>>> +			     rxq->rx_free_thresh, RTE_PMD_NGBE_RX_MAX_BURST);
>>> +		ret = -EINVAL;
>>> +	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
>>
>> rxq->rx_free_thresh >= rxq->nb_rx_desc
>> is simpler to read
>>
>>> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
>>> +			     "rxq->rx_free_thresh=%d, "
>>> +			     "rxq->nb_rx_desc=%d",
>>
>> Do not split format string.
>>
>>> +			     rxq->rx_free_thresh, rxq->nb_rx_desc);
>>> +		ret = -EINVAL;
>>> +	} else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
>>
>> (rxq->nb_rx_desc % rxq->rx_free_thresh) != 0 is easier to read
>>
>>> +		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
>>> +			     "rxq->nb_rx_desc=%d, "
>>> +			     "rxq->rx_free_thresh=%d",
>>> +			     rxq->nb_rx_desc, rxq->rx_free_thresh);
>>
>> Do not split format string
>>
>>> +		ret = -EINVAL;
>>> +	}
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +/* Reset dynamic ngbe_rx_queue fields back to defaults */ static void
>>> +__rte_cold ngbe_reset_rx_queue(struct ngbe_adapter *adapter, struct
>>> +ngbe_rx_queue *rxq) {
>>> +	static const struct ngbe_rx_desc zeroed_desc = {
>>> +						{{0}, {0} }, {{0}, {0} } };
>>> +	unsigned int i;
>>> +	uint16_t len = rxq->nb_rx_desc;
>>> +
>>> +	/*
>>> +	 * By default, the Rx queue setup function allocates enough memory for
>>> +	 * NGBE_RING_DESC_MAX.  The Rx Burst bulk allocation function requires
>>> +	 * extra memory at the end of the descriptor ring to be zero'd out.
>>> +	 */
>>> +	if (adapter->rx_bulk_alloc_allowed)
>>> +		/* zero out extra memory */
>>> +		len += RTE_PMD_NGBE_RX_MAX_BURST;
>>> +
>>> +	/*
>>> +	 * Zero out HW ring memory. Zero out extra memory at the end of
>>> +	 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
>>> +	 * reads extra memory as zeros.
>>> +	 */
>>> +	for (i = 0; i < len; i++)
>>> +		rxq->rx_ring[i] = zeroed_desc;
>>> +
>>> +	/*
>>> +	 * initialize extra software ring entries. Space for these extra
>>> +	 * entries is always allocated
>>> +	 */
>>> +	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
>>> +	for (i = rxq->nb_rx_desc; i < len; ++i)
>>> +		rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
>>> +
>>> +	rxq->rx_nb_avail = 0;
>>> +	rxq->rx_next_avail = 0;
>>> +	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
>>> +	rxq->rx_tail = 0;
>>> +	rxq->nb_rx_hold = 0;
>>> +	rxq->pkt_first_seg = NULL;
>>> +	rxq->pkt_last_seg = NULL;
>>> +}
>>> +
>>> +int __rte_cold
>>> +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
>>> +			 uint16_t queue_idx,
>>> +			 uint16_t nb_desc,
>>> +			 unsigned int socket_id,
>>> +			 const struct rte_eth_rxconf *rx_conf,
>>> +			 struct rte_mempool *mp)
>>> +{
>>> +	const struct rte_memzone *rz;
>>> +	struct ngbe_rx_queue *rxq;
>>> +	struct ngbe_hw     *hw;
>>> +	uint16_t len;
>>> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
>>> +
>>> +	PMD_INIT_FUNC_TRACE();
>>> +	hw = ngbe_dev_hw(dev);
>>> +
>>> +	/*
>>> +	 * Validate number of receive descriptors.
>>> +	 * It must not exceed hardware maximum, and must be multiple
>>> +	 * of NGBE_ALIGN.
>>> +	 */
>>> +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
>>> +			nb_desc > NGBE_RING_DESC_MAX ||
>>> +			nb_desc < NGBE_RING_DESC_MIN) {
>>> +		return -EINVAL;
>>> +	}
>>
>> rte_eth_rx_queue_setup cares about it
>>
> 
> I don't quite understand.

ethdev does the check based dev_info provided by the driver:

if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
    nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
    nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
        RTE_ETHDEV_LOG(ERR,
            "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >=
%hu, and a product of %hu\n",
            nb_rx_desc, dev_info.rx_desc_lim.nb_max,
            dev_info.rx_desc_lim.nb_min,
            dev_info.rx_desc_lim.nb_align);
       return -EINVAL;
}
  
Jiawen Wu July 6, 2021, 7:53 a.m. UTC | #4
On July 5, 2021 5:08 PM, Andrew Rybchenko wrote:
> On 7/5/21 11:36 AM, Jiawen Wu wrote:
> > On July 3, 2021 12:36 AM, Andrew Rybchenko wrote:
> >> On 6/17/21 1:59 PM, Jiawen Wu wrote:
> >>> Setup device Rx queue and release Rx queue.
> >>>
> >>> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
> >>> ---
> >>>  drivers/net/ngbe/meson.build   |   1 +
> >>>  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
> >>> drivers/net/ngbe/ngbe_ethdev.h |  16 ++
> >>>  drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
> >>>  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
> >>>  5 files changed, 457 insertions(+), 1 deletion(-)  create mode
> >>> 100644 drivers/net/ngbe/ngbe_rxtx.c  create mode 100644
> >>> drivers/net/ngbe/ngbe_rxtx.h
> >>>
> >>> a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
> >>> index c952023e8b..e73606c5f3 100644
> >>> --- a/drivers/net/ngbe/ngbe_ethdev.c
> >>> +++ b/drivers/net/ngbe/ngbe_ethdev.c
> >>> @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[] = {
> >>>  	{ .vendor_id = 0, /* sentinel */ },  };
> >>>
> >>> +static const struct rte_eth_desc_lim rx_desc_lim = {
> >>> +	.nb_max = NGBE_RING_DESC_MAX,
> >>> +	.nb_min = NGBE_RING_DESC_MIN,
> >>> +	.nb_align = NGBE_RXD_ALIGN,
> >>> +};
> >>> +
> >>>  static const struct eth_dev_ops ngbe_eth_dev_ops;
> >>>
> >>>  static inline void
> >>> @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)
> >>> static int  ngbe_dev_info_get(struct rte_eth_dev *dev, struct
> >>> rte_eth_dev_info *dev_info)  {
> >>> -	RTE_SET_USED(dev);
> >>> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> >>> +
> >>> +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
> >>> +
> >>> +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
> >>> +		.rx_thresh = {
> >>> +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
> >>> +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
> >>> +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
> >>> +		},
> >>> +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
> >>> +		.rx_drop_en = 0,
> >>> +		.offloads = 0,
> >>> +	};
> >>> +
> >>> +	dev_info->rx_desc_lim = rx_desc_lim;
> >>>
<...>
> >>> +int __rte_cold
> >>> +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> >>> +			 uint16_t queue_idx,
> >>> +			 uint16_t nb_desc,
> >>> +			 unsigned int socket_id,
> >>> +			 const struct rte_eth_rxconf *rx_conf,
> >>> +			 struct rte_mempool *mp)
> >>> +{
> >>> +	const struct rte_memzone *rz;
> >>> +	struct ngbe_rx_queue *rxq;
> >>> +	struct ngbe_hw     *hw;
> >>> +	uint16_t len;
> >>> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
> >>> +
> >>> +	PMD_INIT_FUNC_TRACE();
> >>> +	hw = ngbe_dev_hw(dev);
> >>> +
> >>> +	/*
> >>> +	 * Validate number of receive descriptors.
> >>> +	 * It must not exceed hardware maximum, and must be multiple
> >>> +	 * of NGBE_ALIGN.
> >>> +	 */
> >>> +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
> >>> +			nb_desc > NGBE_RING_DESC_MAX ||
> >>> +			nb_desc < NGBE_RING_DESC_MIN) {
> >>> +		return -EINVAL;
> >>> +	}
> >>
> >> rte_eth_rx_queue_setup cares about it
> >>
> >
> > I don't quite understand.
> 
> ethdev does the check based dev_info provided by the driver:
> 
> if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
>     nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
>     nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
>         RTE_ETHDEV_LOG(ERR,
>             "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu,
> and a product of %hu\n",
>             nb_rx_desc, dev_info.rx_desc_lim.nb_max,
>             dev_info.rx_desc_lim.nb_min,
>             dev_info.rx_desc_lim.nb_align);
>        return -EINVAL;
> }

'dev_info.rx_desc_lim' was set up in function 'ngbe_dev_info_get' with the struct 'rx_desc_lim' defined.
So I think it is appropriate to check with macros. However, the log can be added.
  
Andrew Rybchenko July 6, 2021, 8:06 a.m. UTC | #5
On 7/6/21 10:53 AM, Jiawen Wu wrote:
> On July 5, 2021 5:08 PM, Andrew Rybchenko wrote:
>> On 7/5/21 11:36 AM, Jiawen Wu wrote:
>>> On July 3, 2021 12:36 AM, Andrew Rybchenko wrote:
>>>> On 6/17/21 1:59 PM, Jiawen Wu wrote:
>>>>> Setup device Rx queue and release Rx queue.
>>>>>
>>>>> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
>>>>> ---
>>>>>  drivers/net/ngbe/meson.build   |   1 +
>>>>>  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
>>>>> drivers/net/ngbe/ngbe_ethdev.h |  16 ++
>>>>>  drivers/net/ngbe/ngbe_rxtx.c   | 308 +++++++++++++++++++++++++++++++++
>>>>>  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
>>>>>  5 files changed, 457 insertions(+), 1 deletion(-)  create mode
>>>>> 100644 drivers/net/ngbe/ngbe_rxtx.c  create mode 100644
>>>>> drivers/net/ngbe/ngbe_rxtx.h
>>>>>
>>>>> a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
>>>>> index c952023e8b..e73606c5f3 100644
>>>>> --- a/drivers/net/ngbe/ngbe_ethdev.c
>>>>> +++ b/drivers/net/ngbe/ngbe_ethdev.c
>>>>> @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[] = {
>>>>>  	{ .vendor_id = 0, /* sentinel */ },  };
>>>>>
>>>>> +static const struct rte_eth_desc_lim rx_desc_lim = {
>>>>> +	.nb_max = NGBE_RING_DESC_MAX,
>>>>> +	.nb_min = NGBE_RING_DESC_MIN,
>>>>> +	.nb_align = NGBE_RXD_ALIGN,
>>>>> +};
>>>>> +
>>>>>  static const struct eth_dev_ops ngbe_eth_dev_ops;
>>>>>
>>>>>  static inline void
>>>>> @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)
>>>>> static int  ngbe_dev_info_get(struct rte_eth_dev *dev, struct
>>>>> rte_eth_dev_info *dev_info)  {
>>>>> -	RTE_SET_USED(dev);
>>>>> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
>>>>> +
>>>>> +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
>>>>> +
>>>>> +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
>>>>> +		.rx_thresh = {
>>>>> +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
>>>>> +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
>>>>> +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
>>>>> +		},
>>>>> +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
>>>>> +		.rx_drop_en = 0,
>>>>> +		.offloads = 0,
>>>>> +	};
>>>>> +
>>>>> +	dev_info->rx_desc_lim = rx_desc_lim;
>>>>>
> <...>
>>>>> +int __rte_cold
>>>>> +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
>>>>> +			 uint16_t queue_idx,
>>>>> +			 uint16_t nb_desc,
>>>>> +			 unsigned int socket_id,
>>>>> +			 const struct rte_eth_rxconf *rx_conf,
>>>>> +			 struct rte_mempool *mp)
>>>>> +{
>>>>> +	const struct rte_memzone *rz;
>>>>> +	struct ngbe_rx_queue *rxq;
>>>>> +	struct ngbe_hw     *hw;
>>>>> +	uint16_t len;
>>>>> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
>>>>> +
>>>>> +	PMD_INIT_FUNC_TRACE();
>>>>> +	hw = ngbe_dev_hw(dev);
>>>>> +
>>>>> +	/*
>>>>> +	 * Validate number of receive descriptors.
>>>>> +	 * It must not exceed hardware maximum, and must be multiple
>>>>> +	 * of NGBE_ALIGN.
>>>>> +	 */
>>>>> +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
>>>>> +			nb_desc > NGBE_RING_DESC_MAX ||
>>>>> +			nb_desc < NGBE_RING_DESC_MIN) {
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>
>>>> rte_eth_rx_queue_setup cares about it
>>>>
>>>
>>> I don't quite understand.
>>
>> ethdev does the check based dev_info provided by the driver:
>>
>> if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
>>     nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
>>     nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
>>         RTE_ETHDEV_LOG(ERR,
>>             "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu,
>> and a product of %hu\n",
>>             nb_rx_desc, dev_info.rx_desc_lim.nb_max,
>>             dev_info.rx_desc_lim.nb_min,
>>             dev_info.rx_desc_lim.nb_align);
>>        return -EINVAL;
>> }
> 
> 'dev_info.rx_desc_lim' was set up in function 'ngbe_dev_info_get' with the struct 'rx_desc_lim' defined.
> So I think it is appropriate to check with macros. However, the log can be 
Sorry, but I don't understand why do you want to duplicate
check which is already done in a generic code.
  
Jiawen Wu July 6, 2021, 8:33 a.m. UTC | #6
On July 6, 2021 4:06 PM, Andrew Rybchenko wrote:
> On 7/6/21 10:53 AM, Jiawen Wu wrote:
> > On July 5, 2021 5:08 PM, Andrew Rybchenko wrote:
> >> On 7/5/21 11:36 AM, Jiawen Wu wrote:
> >>> On July 3, 2021 12:36 AM, Andrew Rybchenko wrote:
> >>>> On 6/17/21 1:59 PM, Jiawen Wu wrote:
> >>>>> Setup device Rx queue and release Rx queue.
> >>>>>
> >>>>> Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
> >>>>> ---
> >>>>>  drivers/net/ngbe/meson.build   |   1 +
> >>>>>  drivers/net/ngbe/ngbe_ethdev.c |  37 +++-
> >>>>> drivers/net/ngbe/ngbe_ethdev.h |  16 ++
> >>>>>  drivers/net/ngbe/ngbe_rxtx.c   | 308
> +++++++++++++++++++++++++++++++++
> >>>>>  drivers/net/ngbe/ngbe_rxtx.h   |  96 ++++++++++
> >>>>>  5 files changed, 457 insertions(+), 1 deletion(-)  create mode
> >>>>> 100644 drivers/net/ngbe/ngbe_rxtx.c  create mode 100644
> >>>>> drivers/net/ngbe/ngbe_rxtx.h
> >>>>>
> >>>>> a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
> >>>>> index c952023e8b..e73606c5f3 100644
> >>>>> --- a/drivers/net/ngbe/ngbe_ethdev.c
> >>>>> +++ b/drivers/net/ngbe/ngbe_ethdev.c
> >>>>> @@ -37,6 +38,12 @@ static const struct rte_pci_id pci_id_ngbe_map[]
> = {
> >>>>>  	{ .vendor_id = 0, /* sentinel */ },  };
> >>>>>
> >>>>> +static const struct rte_eth_desc_lim rx_desc_lim = {
> >>>>> +	.nb_max = NGBE_RING_DESC_MAX,
> >>>>> +	.nb_min = NGBE_RING_DESC_MIN,
> >>>>> +	.nb_align = NGBE_RXD_ALIGN,
> >>>>> +};
> >>>>> +
> >>>>>  static const struct eth_dev_ops ngbe_eth_dev_ops;
> >>>>>
> >>>>>  static inline void
> >>>>> @@ -266,11 +280,30 @@ ngbe_dev_close(struct rte_eth_dev *dev)
> >>>>> static int  ngbe_dev_info_get(struct rte_eth_dev *dev, struct
> >>>>> rte_eth_dev_info *dev_info)  {
> >>>>> -	RTE_SET_USED(dev);
> >>>>> +	struct ngbe_hw *hw = ngbe_dev_hw(dev);
> >>>>> +
> >>>>> +	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
> >>>>> +
> >>>>> +	dev_info->default_rxconf = (struct rte_eth_rxconf) {
> >>>>> +		.rx_thresh = {
> >>>>> +			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
> >>>>> +			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
> >>>>> +			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
> >>>>> +		},
> >>>>> +		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
> >>>>> +		.rx_drop_en = 0,
> >>>>> +		.offloads = 0,
> >>>>> +	};
> >>>>> +
> >>>>> +	dev_info->rx_desc_lim = rx_desc_lim;
> >>>>>
> > <...>
> >>>>> +int __rte_cold
> >>>>> +ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
> >>>>> +			 uint16_t queue_idx,
> >>>>> +			 uint16_t nb_desc,
> >>>>> +			 unsigned int socket_id,
> >>>>> +			 const struct rte_eth_rxconf *rx_conf,
> >>>>> +			 struct rte_mempool *mp)
> >>>>> +{
> >>>>> +	const struct rte_memzone *rz;
> >>>>> +	struct ngbe_rx_queue *rxq;
> >>>>> +	struct ngbe_hw     *hw;
> >>>>> +	uint16_t len;
> >>>>> +	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
> >>>>> +
> >>>>> +	PMD_INIT_FUNC_TRACE();
> >>>>> +	hw = ngbe_dev_hw(dev);
> >>>>> +
> >>>>> +	/*
> >>>>> +	 * Validate number of receive descriptors.
> >>>>> +	 * It must not exceed hardware maximum, and must be multiple
> >>>>> +	 * of NGBE_ALIGN.
> >>>>> +	 */
> >>>>> +	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
> >>>>> +			nb_desc > NGBE_RING_DESC_MAX ||
> >>>>> +			nb_desc < NGBE_RING_DESC_MIN) {
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>
> >>>> rte_eth_rx_queue_setup cares about it
> >>>>
> >>>
> >>> I don't quite understand.
> >>
> >> ethdev does the check based dev_info provided by the driver:
> >>
> >> if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
> >>     nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
> >>     nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
> >>         RTE_ETHDEV_LOG(ERR,
> >>             "Invalid value for nb_rx_desc(=%hu), should be: <= %hu,
> >> >= %hu, and a product of %hu\n",
> >>             nb_rx_desc, dev_info.rx_desc_lim.nb_max,
> >>             dev_info.rx_desc_lim.nb_min,
> >>             dev_info.rx_desc_lim.nb_align);
> >>        return -EINVAL;
> >> }
> >
> > 'dev_info.rx_desc_lim' was set up in function 'ngbe_dev_info_get' with the
> struct 'rx_desc_lim' defined.
> > So I think it is appropriate to check with macros. However, the log
> > can be
> Sorry, but I don't understand why do you want to duplicate check which is
> already done in a generic code.

This check can be omitted, thanks.
NGBE driver code was ported from some earlier code, there may some redundant code left.
  

Patch

diff --git a/drivers/net/ngbe/meson.build b/drivers/net/ngbe/meson.build
index 81173fa7f0..9e75b82f1c 100644
--- a/drivers/net/ngbe/meson.build
+++ b/drivers/net/ngbe/meson.build
@@ -12,6 +12,7 @@  objs = [base_objs]
 
 sources = files(
 	'ngbe_ethdev.c',
+	'ngbe_rxtx.c',
 )
 
 includes += include_directories('base')
diff --git a/drivers/net/ngbe/ngbe_ethdev.c b/drivers/net/ngbe/ngbe_ethdev.c
index c952023e8b..e73606c5f3 100644
--- a/drivers/net/ngbe/ngbe_ethdev.c
+++ b/drivers/net/ngbe/ngbe_ethdev.c
@@ -12,6 +12,7 @@ 
 #include "ngbe_logs.h"
 #include "base/ngbe.h"
 #include "ngbe_ethdev.h"
+#include "ngbe_rxtx.h"
 
 static int ngbe_dev_close(struct rte_eth_dev *dev);
 
@@ -37,6 +38,12 @@  static const struct rte_pci_id pci_id_ngbe_map[] = {
 	{ .vendor_id = 0, /* sentinel */ },
 };
 
+static const struct rte_eth_desc_lim rx_desc_lim = {
+	.nb_max = NGBE_RING_DESC_MAX,
+	.nb_min = NGBE_RING_DESC_MIN,
+	.nb_align = NGBE_RXD_ALIGN,
+};
+
 static const struct eth_dev_ops ngbe_eth_dev_ops;
 
 static inline void
@@ -241,12 +248,19 @@  static int
 ngbe_dev_configure(struct rte_eth_dev *dev)
 {
 	struct ngbe_interrupt *intr = ngbe_dev_intr(dev);
+	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
 
 	PMD_INIT_FUNC_TRACE();
 
 	/* set flag to update link status after init */
 	intr->flags |= NGBE_FLAG_NEED_LINK_UPDATE;
 
+	/*
+	 * Initialize to TRUE. If any of Rx queues doesn't meet the bulk
+	 * allocation Rx preconditions we will reset it.
+	 */
+	adapter->rx_bulk_alloc_allowed = true;
+
 	return 0;
 }
 
@@ -266,11 +280,30 @@  ngbe_dev_close(struct rte_eth_dev *dev)
 static int
 ngbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
-	RTE_SET_USED(dev);
+	struct ngbe_hw *hw = ngbe_dev_hw(dev);
+
+	dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues;
+
+	dev_info->default_rxconf = (struct rte_eth_rxconf) {
+		.rx_thresh = {
+			.pthresh = NGBE_DEFAULT_RX_PTHRESH,
+			.hthresh = NGBE_DEFAULT_RX_HTHRESH,
+			.wthresh = NGBE_DEFAULT_RX_WTHRESH,
+		},
+		.rx_free_thresh = NGBE_DEFAULT_RX_FREE_THRESH,
+		.rx_drop_en = 0,
+		.offloads = 0,
+	};
+
+	dev_info->rx_desc_lim = rx_desc_lim;
 
 	dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_100M |
 				ETH_LINK_SPEED_10M;
 
+	/* Driver-preferred Rx/Tx parameters */
+	dev_info->default_rxportconf.nb_queues = 1;
+	dev_info->default_rxportconf.ring_size = 256;
+
 	return 0;
 }
 
@@ -570,6 +603,8 @@  static const struct eth_dev_ops ngbe_eth_dev_ops = {
 	.dev_configure              = ngbe_dev_configure,
 	.dev_infos_get              = ngbe_dev_info_get,
 	.link_update                = ngbe_dev_link_update,
+	.rx_queue_setup             = ngbe_dev_rx_queue_setup,
+	.rx_queue_release           = ngbe_dev_rx_queue_release,
 };
 
 RTE_PMD_REGISTER_PCI(net_ngbe, rte_ngbe_pmd);
diff --git a/drivers/net/ngbe/ngbe_ethdev.h b/drivers/net/ngbe/ngbe_ethdev.h
index b67508a3de..6580d288c8 100644
--- a/drivers/net/ngbe/ngbe_ethdev.h
+++ b/drivers/net/ngbe/ngbe_ethdev.h
@@ -30,6 +30,7 @@  struct ngbe_interrupt {
 struct ngbe_adapter {
 	struct ngbe_hw             hw;
 	struct ngbe_interrupt      intr;
+	bool rx_bulk_alloc_allowed;
 };
 
 static inline struct ngbe_adapter *
@@ -58,6 +59,13 @@  ngbe_dev_intr(struct rte_eth_dev *dev)
 	return intr;
 }
 
+void ngbe_dev_rx_queue_release(void *rxq);
+
+int  ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+
 int
 ngbe_dev_link_update_share(struct rte_eth_dev *dev,
 		int wait_to_complete);
@@ -66,4 +74,12 @@  ngbe_dev_link_update_share(struct rte_eth_dev *dev,
 #define NGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
 #define NGBE_VMDQ_NUM_UC_MAC         4096 /* Maximum nb. of UC MAC addr. */
 
+/*
+ *  Default values for Rx/Tx configuration
+ */
+#define NGBE_DEFAULT_RX_FREE_THRESH  32
+#define NGBE_DEFAULT_RX_PTHRESH      8
+#define NGBE_DEFAULT_RX_HTHRESH      8
+#define NGBE_DEFAULT_RX_WTHRESH      0
+
 #endif /* _NGBE_ETHDEV_H_ */
diff --git a/drivers/net/ngbe/ngbe_rxtx.c b/drivers/net/ngbe/ngbe_rxtx.c
new file mode 100644
index 0000000000..df0b64dc01
--- /dev/null
+++ b/drivers/net/ngbe/ngbe_rxtx.c
@@ -0,0 +1,308 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+
+#include <sys/queue.h>
+
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "ngbe_logs.h"
+#include "base/ngbe.h"
+#include "ngbe_ethdev.h"
+#include "ngbe_rxtx.h"
+
+/**
+ * ngbe_free_sc_cluster - free the not-yet-completed scattered cluster
+ *
+ * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
+ * in the sw_sc_ring is not set to NULL but rather points to the next
+ * mbuf of this RSC aggregation (that has not been completed yet and still
+ * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
+ * will just free first "nb_segs" segments of the cluster explicitly by calling
+ * an rte_pktmbuf_free_seg().
+ *
+ * @m scattered cluster head
+ */
+static void __rte_cold
+ngbe_free_sc_cluster(struct rte_mbuf *m)
+{
+	uint16_t i, nb_segs = m->nb_segs;
+	struct rte_mbuf *next_seg;
+
+	for (i = 0; i < nb_segs; i++) {
+		next_seg = m->next;
+		rte_pktmbuf_free_seg(m);
+		m = next_seg;
+	}
+}
+
+static void __rte_cold
+ngbe_rx_queue_release_mbufs(struct ngbe_rx_queue *rxq)
+{
+	unsigned int i;
+
+	if (rxq->sw_ring != NULL) {
+		for (i = 0; i < rxq->nb_rx_desc; i++) {
+			if (rxq->sw_ring[i].mbuf != NULL) {
+				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+				rxq->sw_ring[i].mbuf = NULL;
+			}
+		}
+		if (rxq->rx_nb_avail) {
+			for (i = 0; i < rxq->rx_nb_avail; ++i) {
+				struct rte_mbuf *mb;
+
+				mb = rxq->rx_stage[rxq->rx_next_avail + i];
+				rte_pktmbuf_free_seg(mb);
+			}
+			rxq->rx_nb_avail = 0;
+		}
+	}
+
+	if (rxq->sw_sc_ring != NULL)
+		for (i = 0; i < rxq->nb_rx_desc; i++)
+			if (rxq->sw_sc_ring[i].fbuf) {
+				ngbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
+				rxq->sw_sc_ring[i].fbuf = NULL;
+			}
+}
+
+static void __rte_cold
+ngbe_rx_queue_release(struct ngbe_rx_queue *rxq)
+{
+	if (rxq != NULL) {
+		ngbe_rx_queue_release_mbufs(rxq);
+		rte_free(rxq->sw_ring);
+		rte_free(rxq->sw_sc_ring);
+		rte_free(rxq);
+	}
+}
+
+void __rte_cold
+ngbe_dev_rx_queue_release(void *rxq)
+{
+	ngbe_rx_queue_release(rxq);
+}
+
+/*
+ * Check if Rx Burst Bulk Alloc function can be used.
+ * Return
+ *        0: the preconditions are satisfied and the bulk allocation function
+ *           can be used.
+ *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
+ *           function must be used.
+ */
+static inline int __rte_cold
+check_rx_burst_bulk_alloc_preconditions(struct ngbe_rx_queue *rxq)
+{
+	int ret = 0;
+
+	/*
+	 * Make sure the following pre-conditions are satisfied:
+	 *   rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST
+	 *   rxq->rx_free_thresh < rxq->nb_rx_desc
+	 *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
+	 * Scattered packets are not supported.  This should be checked
+	 * outside of this function.
+	 */
+	if (!(rxq->rx_free_thresh >= RTE_PMD_NGBE_RX_MAX_BURST)) {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
+			     "rxq->rx_free_thresh=%d, "
+			     "RTE_PMD_NGBE_RX_MAX_BURST=%d",
+			     rxq->rx_free_thresh, RTE_PMD_NGBE_RX_MAX_BURST);
+		ret = -EINVAL;
+	} else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
+			     "rxq->rx_free_thresh=%d, "
+			     "rxq->nb_rx_desc=%d",
+			     rxq->rx_free_thresh, rxq->nb_rx_desc);
+		ret = -EINVAL;
+	} else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
+		PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
+			     "rxq->nb_rx_desc=%d, "
+			     "rxq->rx_free_thresh=%d",
+			     rxq->nb_rx_desc, rxq->rx_free_thresh);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Reset dynamic ngbe_rx_queue fields back to defaults */
+static void __rte_cold
+ngbe_reset_rx_queue(struct ngbe_adapter *adapter, struct ngbe_rx_queue *rxq)
+{
+	static const struct ngbe_rx_desc zeroed_desc = {
+						{{0}, {0} }, {{0}, {0} } };
+	unsigned int i;
+	uint16_t len = rxq->nb_rx_desc;
+
+	/*
+	 * By default, the Rx queue setup function allocates enough memory for
+	 * NGBE_RING_DESC_MAX.  The Rx Burst bulk allocation function requires
+	 * extra memory at the end of the descriptor ring to be zero'd out.
+	 */
+	if (adapter->rx_bulk_alloc_allowed)
+		/* zero out extra memory */
+		len += RTE_PMD_NGBE_RX_MAX_BURST;
+
+	/*
+	 * Zero out HW ring memory. Zero out extra memory at the end of
+	 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
+	 * reads extra memory as zeros.
+	 */
+	for (i = 0; i < len; i++)
+		rxq->rx_ring[i] = zeroed_desc;
+
+	/*
+	 * initialize extra software ring entries. Space for these extra
+	 * entries is always allocated
+	 */
+	memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
+	for (i = rxq->nb_rx_desc; i < len; ++i)
+		rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
+
+	rxq->rx_nb_avail = 0;
+	rxq->rx_next_avail = 0;
+	rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
+	rxq->rx_tail = 0;
+	rxq->nb_rx_hold = 0;
+	rxq->pkt_first_seg = NULL;
+	rxq->pkt_last_seg = NULL;
+}
+
+int __rte_cold
+ngbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
+			 uint16_t queue_idx,
+			 uint16_t nb_desc,
+			 unsigned int socket_id,
+			 const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool *mp)
+{
+	const struct rte_memzone *rz;
+	struct ngbe_rx_queue *rxq;
+	struct ngbe_hw     *hw;
+	uint16_t len;
+	struct ngbe_adapter *adapter = ngbe_dev_adapter(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	hw = ngbe_dev_hw(dev);
+
+	/*
+	 * Validate number of receive descriptors.
+	 * It must not exceed hardware maximum, and must be multiple
+	 * of NGBE_ALIGN.
+	 */
+	if (nb_desc % NGBE_RXD_ALIGN != 0 ||
+			nb_desc > NGBE_RING_DESC_MAX ||
+			nb_desc < NGBE_RING_DESC_MIN) {
+		return -EINVAL;
+	}
+
+	/* Free memory prior to re-allocation if needed... */
+	if (dev->data->rx_queues[queue_idx] != NULL) {
+		ngbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
+		dev->data->rx_queues[queue_idx] = NULL;
+	}
+
+	/* First allocate the Rx queue data structure */
+	rxq = rte_zmalloc_socket("ethdev RX queue",
+				 sizeof(struct ngbe_rx_queue),
+				 RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq == NULL)
+		return -ENOMEM;
+	rxq->mb_pool = mp;
+	rxq->nb_rx_desc = nb_desc;
+	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+	rxq->queue_id = queue_idx;
+	rxq->reg_idx = queue_idx;
+	rxq->port_id = dev->data->port_id;
+	rxq->drop_en = rx_conf->rx_drop_en;
+	rxq->rx_deferred_start = rx_conf->rx_deferred_start;
+
+	/*
+	 * Allocate Rx ring hardware descriptors. A memzone large enough to
+	 * handle the maximum ring size is allocated in order to allow for
+	 * resizing in later calls to the queue setup function.
+	 */
+	rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
+				      RX_RING_SZ, NGBE_ALIGN, socket_id);
+	if (rz == NULL) {
+		ngbe_rx_queue_release(rxq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Zero init all the descriptors in the ring.
+	 */
+	memset(rz->addr, 0, RX_RING_SZ);
+
+	rxq->rdt_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXWP(rxq->reg_idx));
+	rxq->rdh_reg_addr = NGBE_REG_ADDR(hw, NGBE_RXRP(rxq->reg_idx));
+
+	rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
+	rxq->rx_ring = (struct ngbe_rx_desc *)TMZ_VADDR(rz);
+
+	/*
+	 * Certain constraints must be met in order to use the bulk buffer
+	 * allocation Rx burst function. If any of Rx queues doesn't meet them
+	 * the feature should be disabled for the whole port.
+	 */
+	if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
+		PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
+				    "preconditions - canceling the feature for "
+				    "the whole port[%d]",
+			     rxq->queue_id, rxq->port_id);
+		adapter->rx_bulk_alloc_allowed = false;
+	}
+
+	/*
+	 * Allocate software ring. Allow for space at the end of the
+	 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
+	 * function does not access an invalid memory region.
+	 */
+	len = nb_desc;
+	if (adapter->rx_bulk_alloc_allowed)
+		len += RTE_PMD_NGBE_RX_MAX_BURST;
+
+	rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+					  sizeof(struct ngbe_rx_entry) * len,
+					  RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq->sw_ring == NULL) {
+		ngbe_rx_queue_release(rxq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Always allocate even if it's not going to be needed in order to
+	 * simplify the code.
+	 *
+	 * This ring is used in Scattered Rx cases and Scattered Rx may
+	 * be requested in ngbe_dev_rx_init(), which is called later from
+	 * dev_start() flow.
+	 */
+	rxq->sw_sc_ring =
+		rte_zmalloc_socket("rxq->sw_sc_ring",
+				  sizeof(struct ngbe_scattered_rx_entry) * len,
+				  RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq->sw_sc_ring == NULL) {
+		ngbe_rx_queue_release(rxq);
+		return -ENOMEM;
+	}
+
+	PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
+			    "dma_addr=0x%" PRIx64,
+		     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
+		     rxq->rx_ring_phys_addr);
+
+	dev->data->rx_queues[queue_idx] = rxq;
+
+	ngbe_reset_rx_queue(adapter, rxq);
+
+	return 0;
+}
+
diff --git a/drivers/net/ngbe/ngbe_rxtx.h b/drivers/net/ngbe/ngbe_rxtx.h
new file mode 100644
index 0000000000..92b9a9fd1b
--- /dev/null
+++ b/drivers/net/ngbe/ngbe_rxtx.h
@@ -0,0 +1,96 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018-2020 Beijing WangXun Technology Co., Ltd.
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+
+#ifndef _NGBE_RXTX_H_
+#define _NGBE_RXTX_H_
+
+/*****************************************************************************
+ * Receive Descriptor
+ *****************************************************************************/
+struct ngbe_rx_desc {
+	struct {
+		union {
+			rte_le32_t dw0;
+			struct {
+				rte_le16_t pkt;
+				rte_le16_t hdr;
+			} lo;
+		};
+		union {
+			rte_le32_t dw1;
+			struct {
+				rte_le16_t ipid;
+				rte_le16_t csum;
+			} hi;
+		};
+	} qw0; /* also as r.pkt_addr */
+	struct {
+		union {
+			rte_le32_t dw2;
+			struct {
+				rte_le32_t status;
+			} lo;
+		};
+		union {
+			rte_le32_t dw3;
+			struct {
+				rte_le16_t len;
+				rte_le16_t tag;
+			} hi;
+		};
+	} qw1; /* also as r.hdr_addr */
+};
+
+#define RTE_PMD_NGBE_RX_MAX_BURST 32
+
+#define RX_RING_SZ ((NGBE_RING_DESC_MAX + RTE_PMD_NGBE_RX_MAX_BURST) * \
+		    sizeof(struct ngbe_rx_desc))
+
+
+/**
+ * Structure associated with each descriptor of the Rx ring of a Rx queue.
+ */
+struct ngbe_rx_entry {
+	struct rte_mbuf *mbuf; /**< mbuf associated with Rx descriptor. */
+};
+
+struct ngbe_scattered_rx_entry {
+	struct rte_mbuf *fbuf; /**< First segment of the fragmented packet. */
+};
+
+/**
+ * Structure associated with each Rx queue.
+ */
+struct ngbe_rx_queue {
+	struct rte_mempool  *mb_pool; /**< mbuf pool to populate Rx ring. */
+	volatile struct ngbe_rx_desc *rx_ring; /**< Rx ring virtual address. */
+	uint64_t            rx_ring_phys_addr; /**< Rx ring DMA address. */
+	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
+	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
+	struct ngbe_rx_entry *sw_ring; /**< address of Rx software ring. */
+	/**< address of scattered Rx software ring. */
+	struct ngbe_scattered_rx_entry *sw_sc_ring;
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
+	uint16_t            nb_rx_desc; /**< number of Rx descriptors. */
+	uint16_t            rx_tail;  /**< current value of RDT register. */
+	uint16_t            nb_rx_hold; /**< number of held free Rx desc. */
+	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
+	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
+	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
+	uint16_t            rx_free_thresh; /**< max free Rx desc to hold. */
+	uint16_t            queue_id; /**< RX queue index. */
+	uint16_t            reg_idx;  /**< RX queue register index. */
+	/**< Packet type mask for different NICs. */
+	uint16_t            port_id;  /**< Device port identifier. */
+	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
+	uint8_t             rx_deferred_start; /**< not in global dev start. */
+	/** need to alloc dummy mbuf, for wraparound when scanning hw ring */
+	struct rte_mbuf fake_mbuf;
+	/** hold packets to return to application */
+	struct rte_mbuf *rx_stage[RTE_PMD_NGBE_RX_MAX_BURST * 2];
+};
+
+#endif /* _NGBE_RXTX_H_ */