On 9/9/2020 4:52 PM, Lance Richardson wrote:
> Improve support for small ring sizes:
> - Ensure that transmit free threshold is no more than 1/4 ring size.
> - Ensure that receive free threshold is no more than 1/4 ring size.
> - Validate requested ring sizes against minimum supported size.
> - Use rxq receive free threshold instead of fixed maximum burst
> size to trigger bulk receive buffer allocation.
>
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> ---
> drivers/net/bnxt/bnxt_rxq.c | 6 +++--
> drivers/net/bnxt/bnxt_rxtx_vec_common.h | 10 +++++----
> drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 29 +++++++++++++------------
> drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 29 +++++++++++++------------
> drivers/net/bnxt/bnxt_txq.c | 7 ++++--
> 5 files changed, 45 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
> index db9aa1f3ed..4ef3b5cb5c 100644
> --- a/drivers/net/bnxt/bnxt_rxq.c
> +++ b/drivers/net/bnxt/bnxt_rxq.c
> @@ -14,6 +14,7 @@
> #include "bnxt_rxq.h"
> #include "bnxt_rxr.h"
> #include "bnxt_vnic.h"
> +#include "bnxt_rxtx_vec_common.h"
> #include "hsi_struct_def_dpdk.h"
>
Hi Lance, Ajit, Somnath,
Raslan reported a build error for PPC architecture, this happens because
static inline functions in 'bnxt_rxtx_vec_common.h' are using
'rxq->rxrearm_nb' & 'rxq->rxrearm_start' which are not defined for PPC.
As far as I can see the 'bnxt_rxtx_vec_common.h' is included because of
some macros.
A quick fix can be to wrap all static inline functions with !PPC checks
but from the name of the header file, it looks like it shouldn't be
included by scalar datapath .c files at first place.
Instead it can be possible to extract those macros into another header
and both these .c file and 'bnxt_rxtx_vec_common.h' can include it.
Or 'bnxt_txq.h' & 'bnxt_txr.h' can be used to hold those macros, and
those headers were already included by 'bnxt_rxtx_vec_common.h' and .c
files.
Anyway can you please provide the fix as incremental patches on top of
latest head, so I can squash them into original patches.
If the patches can't be done as incremental fixes or if they will delay,
I will need to drop the patchset from next-net, to not block any
possible pull from main repo.
On Mon, Sep 14, 2020 at 6:03 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
<snip>
>
> Hi Lance, Ajit, Somnath,
>
> Raslan reported a build error for PPC architecture, this happens because
> static inline functions in 'bnxt_rxtx_vec_common.h' are using
> 'rxq->rxrearm_nb' & 'rxq->rxrearm_start' which are not defined for PPC.
>
> As far as I can see the 'bnxt_rxtx_vec_common.h' is included because of
> some macros.
> A quick fix can be to wrap all static inline functions with !PPC checks
> but from the name of the header file, it looks like it shouldn't be
> included by scalar datapath .c files at first place.
>
> Instead it can be possible to extract those macros into another header
> and both these .c file and 'bnxt_rxtx_vec_common.h' can include it.
> Or 'bnxt_txq.h' & 'bnxt_txr.h' can be used to hold those macros, and
> those headers were already included by 'bnxt_rxtx_vec_common.h' and .c
> files.
>
>
> Anyway can you please provide the fix as incremental patches on top of
> latest head, so I can squash them into original patches.
>
> If the patches can't be done as incremental fixes or if they will delay,
> I will need to drop the patchset from next-net, to not block any
> possible pull from main repo.
Hi Feruh,
I just sent a fix for this based on the head of next-net, compile-tested
on x86_64, arm64, and powerpc. Please let me know if you run into
any further snags.
Thanks,
Lance
@@ -14,6 +14,7 @@
#include "bnxt_rxq.h"
#include "bnxt_rxr.h"
#include "bnxt_vnic.h"
+#include "bnxt_rxtx_vec_common.h"
#include "hsi_struct_def_dpdk.h"
/*
@@ -305,7 +306,7 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
return -EINVAL;
}
- if (!nb_desc || nb_desc > MAX_RX_DESC_CNT) {
+ if (nb_desc < BNXT_MIN_RING_DESC || nb_desc > MAX_RX_DESC_CNT) {
PMD_DRV_LOG(ERR, "nb_desc %d is invalid\n", nb_desc);
rc = -EINVAL;
goto out;
@@ -326,7 +327,8 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
rxq->bp = bp;
rxq->mb_pool = mp;
rxq->nb_rx_desc = nb_desc;
- rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+ rxq->rx_free_thresh =
+ RTE_MIN(rte_align32pow2(nb_desc) / 4, RTE_BNXT_MAX_RX_BURST);
PMD_DRV_LOG(DEBUG, "RX Buf MTU %d\n", eth_dev->data->mtu);
@@ -5,11 +5,13 @@
#ifndef _BNXT_RXTX_VEC_COMMON_H_
#define _BNXT_RXTX_VEC_COMMON_H_
+#include "hsi_struct_def_dpdk.h"
+#include "bnxt_rxq.h"
+#include "bnxt_rxr.h"
-#define RTE_BNXT_MAX_RX_BURST 32
-#define RTE_BNXT_MAX_TX_BURST 32
-#define RTE_BNXT_RXQ_REARM_THRESH 32
-#define RTE_BNXT_DESCS_PER_LOOP 4
+#define RTE_BNXT_MAX_RX_BURST 32U
+#define RTE_BNXT_MAX_TX_BURST 32U
+#define RTE_BNXT_DESCS_PER_LOOP 4U
#define TX_BD_FLAGS_CMPL ((1 << TX_BD_LONG_FLAGS_BD_CNT_SFT) | \
TX_BD_SHORT_FLAGS_COAL_NOW | \
@@ -13,9 +13,6 @@
#include "bnxt.h"
#include "bnxt_cpr.h"
#include "bnxt_ring.h"
-#include "bnxt_rxr.h"
-#include "bnxt_rxq.h"
-#include "hsi_struct_def_dpdk.h"
#include "bnxt_rxtx_vec_common.h"
#include "bnxt_txq.h"
@@ -31,23 +28,27 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
struct rte_mbuf *mb0, *mb1;
- int i;
+ int nb, i;
const uint64x2_t hdr_room = {0, RTE_PKTMBUF_HEADROOM};
const uint64x2_t addrmask = {0, UINT64_MAX};
- /* Pull RTE_BNXT_RXQ_REARM_THRESH more mbufs into the software ring */
- if (rte_mempool_get_bulk(rxq->mb_pool,
- (void *)rx_bufs,
- RTE_BNXT_RXQ_REARM_THRESH) < 0) {
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_BNXT_RXQ_REARM_THRESH;
+ /*
+ * Number of mbufs to allocate must be a multiple of two. The
+ * allocation must not go past the end of the ring.
+ */
+ nb = RTE_MIN(rxq->rxrearm_nb & ~0x1,
+ rxq->nb_rx_desc - rxq->rxrearm_start);
+
+ /* Allocate new mbufs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rx_bufs, nb) < 0) {
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += nb;
return;
}
/* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < RTE_BNXT_RXQ_REARM_THRESH; i += 2, rx_bufs += 2) {
+ for (i = 0; i < nb; i += 2, rx_bufs += 2) {
uint64x2_t buf_addr0, buf_addr1;
uint64x2_t rxbd0, rxbd1;
@@ -83,12 +84,12 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
vst1q_u64((uint64_t *)(rxbds++), rxbd1);
}
- rxq->rxrearm_start += RTE_BNXT_RXQ_REARM_THRESH;
+ rxq->rxrearm_start += nb;
bnxt_db_write(&rxr->rx_db, rxq->rxrearm_start - 1);
if (rxq->rxrearm_start >= rxq->nb_rx_desc)
rxq->rxrearm_start = 0;
- rxq->rxrearm_nb -= RTE_BNXT_RXQ_REARM_THRESH;
+ rxq->rxrearm_nb -= nb;
}
static uint32_t
@@ -220,7 +221,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
if (unlikely(!rxq->rx_started))
return 0;
- if (rxq->rxrearm_nb >= RTE_BNXT_RXQ_REARM_THRESH)
+ if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
bnxt_rxq_rearm(rxq, rxr);
/* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
@@ -17,9 +17,6 @@
#include "bnxt.h"
#include "bnxt_cpr.h"
#include "bnxt_ring.h"
-#include "bnxt_rxr.h"
-#include "bnxt_rxq.h"
-#include "hsi_struct_def_dpdk.h"
#include "bnxt_rxtx_vec_common.h"
#include "bnxt_txq.h"
@@ -35,23 +32,27 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
struct rte_mbuf *mb0, *mb1;
- int i;
+ int nb, i;
const __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, 0);
const __m128i addrmask = _mm_set_epi64x(UINT64_MAX, 0);
- /* Pull RTE_BNXT_RXQ_REARM_THRESH more mbufs into the software ring */
- if (rte_mempool_get_bulk(rxq->mb_pool,
- (void *)rx_bufs,
- RTE_BNXT_RXQ_REARM_THRESH) < 0) {
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_BNXT_RXQ_REARM_THRESH;
+ /*
+ * Number of mbufs to allocate must be a multiple of two. The
+ * allocation must not go past the end of the ring.
+ */
+ nb = RTE_MIN(rxq->rxrearm_nb & ~0x1,
+ rxq->nb_rx_desc - rxq->rxrearm_start);
+
+ /* Allocate new mbufs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rx_bufs, nb) < 0) {
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += nb;
return;
}
/* Initialize the mbufs in vector, process 2 mbufs in one loop */
- for (i = 0; i < RTE_BNXT_RXQ_REARM_THRESH; i += 2, rx_bufs += 2) {
+ for (i = 0; i < nb; i += 2, rx_bufs += 2) {
__m128i buf_addr0, buf_addr1;
__m128i rxbd0, rxbd1;
@@ -87,12 +88,12 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
_mm_store_si128((__m128i *)(rxbds++), rxbd1);
}
- rxq->rxrearm_start += RTE_BNXT_RXQ_REARM_THRESH;
+ rxq->rxrearm_start += nb;
bnxt_db_write(&rxr->rx_db, rxq->rxrearm_start - 1);
if (rxq->rxrearm_start >= rxq->nb_rx_desc)
rxq->rxrearm_start = 0;
- rxq->rxrearm_nb -= RTE_BNXT_RXQ_REARM_THRESH;
+ rxq->rxrearm_nb -= nb;
}
static uint32_t
@@ -223,7 +224,7 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
if (unlikely(!rxq->rx_started))
return 0;
- if (rxq->rxrearm_nb >= RTE_BNXT_RXQ_REARM_THRESH)
+ if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
bnxt_rxq_rearm(rxq, rxr);
/* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
@@ -11,6 +11,7 @@
#include "bnxt_ring.h"
#include "bnxt_txq.h"
#include "bnxt_txr.h"
+#include "bnxt_rxtx_vec_common.h"
/*
* TX Queues
@@ -97,7 +98,7 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
return -EINVAL;
}
- if (!nb_desc || nb_desc > MAX_TX_DESC_CNT) {
+ if (nb_desc < BNXT_MIN_RING_DESC || nb_desc > MAX_TX_DESC_CNT) {
PMD_DRV_LOG(ERR, "nb_desc %d is invalid", nb_desc);
rc = -EINVAL;
goto out;
@@ -129,7 +130,9 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
}
txq->bp = bp;
txq->nb_tx_desc = nb_desc;
- txq->tx_free_thresh = tx_conf->tx_free_thresh;
+ txq->tx_free_thresh =
+ RTE_MIN(rte_align32pow2(nb_desc) / 4, RTE_BNXT_MAX_TX_BURST);
+
txq->tx_deferred_start = tx_conf->tx_deferred_start;
rc = bnxt_init_tx_ring_struct(txq, socket_id);