[2/4] net/mlx5: enable MPRQ multi-stride operations
Checks
Commit Message
MPRQ feature should be updated to allow a packet to be received
into multiple strides in order to support the MTU exceeding 8KB.
Special care is needed to prevent the headroom corruption in the
multi-stride mode since the headroom space is borrowed by the PMD
from the tail of the preceding stride. Copy the whole packet into
a separate mbuf in this case or just the overlapping data if the
Rx scattering is supported by an application.
Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
---
drivers/net/mlx5/mlx5_rxq.c | 25 ++++------------
drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
3 files changed, 35 insertions(+), 60 deletions(-)
Comments
> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@mellanox.com>
> Sent: Wednesday, April 1, 2020 0:53
> To: dev@dpdk.org
> Cc: Raslan Darawsheh <rasland@mellanox.com>; Matan Azrad
> <matan@mellanox.com>; Slava Ovsiienko <viacheslavo@mellanox.com>;
> ferruh.yigit@intel.com; Thomas Monjalon <thomas@monjalon.net>
> Subject: [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations
>
> MPRQ feature should be updated to allow a packet to be received into
> multiple strides in order to support the MTU exceeding 8KB.
> Special care is needed to prevent the headroom corruption in the multi-stride
> mode since the headroom space is borrowed by the PMD from the tail of the
> preceding stride. Copy the whole packet into a separate mbuf in this case or
> just the overlapping data if the Rx scattering is supported by an application.
>
> Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
> drivers/net/mlx5/mlx5_rxq.c | 25 ++++------------
> drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
> drivers/net/mlx5/mlx5_rxtx.h | 2 +-
> 3 files changed, 35 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index
> 85fcfe6..a64f536 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1793,9 +1793,7 @@ struct mlx5_rxq_ctrl *
> struct mlx5_priv *priv = dev->data->dev_private;
> struct mlx5_rxq_ctrl *tmpl;
> unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> - unsigned int mprq_stride_size;
> struct mlx5_dev_config *config = &priv->config;
> - unsigned int strd_headroom_en;
> /*
> * Always allocate extra slots, even if eventually
> * the vector Rx will not be used.
> @@ -1841,27 +1839,13 @@ struct mlx5_rxq_ctrl *
> tmpl->socket = socket;
> if (dev->data->dev_conf.intr_conf.rxq)
> tmpl->irq = 1;
> - /*
> - * LRO packet may consume all the stride memory, hence we cannot
> - * guaranty head-room near the packet memory in the stride.
> - * In this case scatter is, for sure, enabled and an empty mbuf may be
> - * added in the start for the head-room.
> - */
> - if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
> - non_scatter_min_mbuf_size > mb_len) {
> - strd_headroom_en = 0;
> - mprq_stride_size = RTE_MIN(max_rx_pkt_len,
> - 1u << config-
> >mprq.max_stride_size_n);
> - } else {
> - strd_headroom_en = 1;
> - mprq_stride_size = non_scatter_min_mbuf_size;
> - }
> if (!config->mprq.stride_num_n)
> config->mprq.stride_num_n = MLX5_MPRQ_STRIDE_NUM_N;
> if (!config->mprq.stride_size_n)
> - config->mprq.stride_size_n = (mprq_stride_size <=
> + config->mprq.stride_size_n = (non_scatter_min_mbuf_size <=
> (1U << config->mprq.max_stride_size_n)) ?
> - log2above(mprq_stride_size) :
> MLX5_MPRQ_STRIDE_SIZE_N;
> + log2above(non_scatter_min_mbuf_size) :
> + MLX5_MPRQ_STRIDE_SIZE_N;
> /*
> * This Rx queue can be configured as a Multi-Packet RQ if all of the
> * following conditions are met:
> @@ -1877,7 +1861,8 @@ struct mlx5_rxq_ctrl *
> tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
> tmpl->rxq.strd_sz_n = config->mprq.stride_size_n;
> tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
> - tmpl->rxq.strd_headroom_en = strd_headroom_en;
> + tmpl->rxq.strd_scatter_en =
> + !!(offloads & DEV_RX_OFFLOAD_SCATTER);
> tmpl->rxq.mprq_max_memcpy_len =
> RTE_MIN(first_mb_free_size,
> config->mprq.max_memcpy_len);
> max_lro_size = RTE_MIN(max_rx_pkt_len, diff --git
> a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index
> f3bf763..4c27952 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
> unsigned int i = 0;
> uint32_t rq_ci = rxq->rq_ci;
> uint16_t consumed_strd = rxq->consumed_strd;
> - uint16_t headroom_sz = rxq->strd_headroom_en *
> RTE_PKTMBUF_HEADROOM;
> struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
>
> while (i < pkts_n) {
> struct rte_mbuf *pkt;
> void *addr;
> int ret;
> - unsigned int len;
> + uint32_t len;
> uint16_t strd_cnt;
> uint16_t strd_idx;
> uint32_t offset;
> uint32_t byte_cnt;
> + int32_t hdrm_overlap;
> volatile struct mlx5_mini_cqe8 *mcqe = NULL;
> uint32_t rss_hash_res = 0;
> - uint8_t lro_num_seg;
>
> if (consumed_strd == strd_n) {
> /* Replace WQE only if the buffer is still in use. */
> @@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
> MLX5_ASSERT(strd_idx < strd_n);
> MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
> wq_mask));
> - lro_num_seg = cqe->lro_num_seg;
> - /*
> - * Currently configured to receive a packet per a stride. But if
> - * MTU is adjusted through kernel interface, device could
> - * consume multiple strides without raising an error. In this
> - * case, the packet should be dropped because it is bigger
> than
> - * the max_rx_pkt_len.
> - */
> - if (unlikely(!lro_num_seg && strd_cnt > 1)) {
> - ++rxq->stats.idropped;
> - continue;
> - }
> pkt = rte_pktmbuf_alloc(rxq->mp);
> if (unlikely(pkt == NULL)) {
> ++rxq->stats.rx_nombuf;
> @@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
> len -= RTE_ETHER_CRC_LEN;
> offset = strd_idx * strd_sz + strd_shift;
> addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n),
> offset);
> + hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt *
> strd_sz;
> /*
> * Memcpy packets to the target mbuf if:
> * - The size of packet is smaller than
> mprq_max_memcpy_len.
> * - Out of buffer in the Mempool for Multi-Packet RQ.
> + * - There is no space for a headroom and scatter is disabled.
> */
> - if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl ==
> NULL) {
> + if (len <= rxq->mprq_max_memcpy_len ||
> + rxq->mprq_repl == NULL ||
> + (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
> /*
> * When memcpy'ing packet due to out-of-buffer, the
> * packet must be smaller than the target mbuf.
> @@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
> rte_atomic16_add_return(&buf->refcnt, 1);
> MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf-
> >refcnt) <=
> strd_n + 1);
> - buf_addr = RTE_PTR_SUB(addr, headroom_sz);
> + buf_addr = RTE_PTR_SUB(addr,
> RTE_PKTMBUF_HEADROOM);
> /*
> * MLX5 device doesn't use iova but it is necessary in a
> * case where the Rx packet is transmitted via a @@ -
> 1788,43 +1779,42 @@ enum mlx5_txcmp_code {
> rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
> buf_len, shinfo);
> /* Set mbuf head-room. */
> - pkt->data_off = headroom_sz;
> + SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
> MLX5_ASSERT(pkt->ol_flags ==
> EXT_ATTACHED_MBUF);
> - /*
> - * Prevent potential overflow due to MTU change
> through
> - * kernel interface.
> - */
> - if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
> - rte_pktmbuf_free_seg(pkt);
> - ++rxq->stats.idropped;
> - continue;
> - }
> + MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
> + len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
> DATA_LEN(pkt) = len;
> /*
> - * LRO packet may consume all the stride memory, in
> this
> - * case packet head-room space is not guaranteed so
> must
> - * to add an empty mbuf for the head-room.
> + * Copy the last fragment of a packet (up to
> headroom
> + * size bytes) in case there is a stride overlap with
> + * a next packet's headroom. Allocate a separate
> mbuf
> + * to store this fragment and link it. Scatter is on.
> */
> - if (!rxq->strd_headroom_en) {
> - struct rte_mbuf *headroom_mbuf =
> - rte_pktmbuf_alloc(rxq->mp);
> + if (hdrm_overlap > 0) {
> + MLX5_ASSERT(rxq->strd_scatter_en);
> + struct rte_mbuf *seg =
> + rte_pktmbuf_alloc(rxq->mp);
>
> - if (unlikely(headroom_mbuf == NULL)) {
> + if (unlikely(seg == NULL)) {
> rte_pktmbuf_free_seg(pkt);
> ++rxq->stats.rx_nombuf;
> break;
> }
> - PORT(pkt) = rxq->port_id;
> - NEXT(headroom_mbuf) = pkt;
> - pkt = headroom_mbuf;
> + SET_DATA_OFF(seg, 0);
> + rte_memcpy(rte_pktmbuf_mtod(seg, void *),
> + RTE_PTR_ADD(addr, len -
> hdrm_overlap),
> + hdrm_overlap);
> + DATA_LEN(seg) = hdrm_overlap;
> + DATA_LEN(pkt) = len - hdrm_overlap;
> + NEXT(pkt) = seg;
> NB_SEGS(pkt) = 2;
> }
> }
> rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
> - if (lro_num_seg > 1) {
> + if (cqe->lro_num_seg > 1) {
> mlx5_lro_update_hdr(addr, cqe, len);
> pkt->ol_flags |= PKT_RX_LRO;
> - pkt->tso_segsz = strd_sz;
> + pkt->tso_segsz = len / cqe->lro_num_seg;
> }
> PKT_LEN(pkt) = len;
> PORT(pkt) = rxq->port_id;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 939778a..d155c24 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -119,7 +119,7 @@ struct mlx5_rxq_data {
> unsigned int strd_sz_n:4; /* Log 2 of stride size. */
> unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
> unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
> - unsigned int strd_headroom_en:1; /* Enable mbuf headroom in
> MPRQ. */
> + unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
> unsigned int lro:1; /* Enable LRO. */
> unsigned int :1; /* Remaining bits. */
> volatile uint32_t *rq_db;
> --
> 1.8.3.1
@@ -1793,9 +1793,7 @@ struct mlx5_rxq_ctrl *
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
- unsigned int mprq_stride_size;
struct mlx5_dev_config *config = &priv->config;
- unsigned int strd_headroom_en;
/*
* Always allocate extra slots, even if eventually
* the vector Rx will not be used.
@@ -1841,27 +1839,13 @@ struct mlx5_rxq_ctrl *
tmpl->socket = socket;
if (dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
- /*
- * LRO packet may consume all the stride memory, hence we cannot
- * guaranty head-room near the packet memory in the stride.
- * In this case scatter is, for sure, enabled and an empty mbuf may be
- * added in the start for the head-room.
- */
- if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
- non_scatter_min_mbuf_size > mb_len) {
- strd_headroom_en = 0;
- mprq_stride_size = RTE_MIN(max_rx_pkt_len,
- 1u << config->mprq.max_stride_size_n);
- } else {
- strd_headroom_en = 1;
- mprq_stride_size = non_scatter_min_mbuf_size;
- }
if (!config->mprq.stride_num_n)
config->mprq.stride_num_n = MLX5_MPRQ_STRIDE_NUM_N;
if (!config->mprq.stride_size_n)
- config->mprq.stride_size_n = (mprq_stride_size <=
+ config->mprq.stride_size_n = (non_scatter_min_mbuf_size <=
(1U << config->mprq.max_stride_size_n)) ?
- log2above(mprq_stride_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+ log2above(non_scatter_min_mbuf_size) :
+ MLX5_MPRQ_STRIDE_SIZE_N;
/*
* This Rx queue can be configured as a Multi-Packet RQ if all of the
* following conditions are met:
@@ -1877,7 +1861,8 @@ struct mlx5_rxq_ctrl *
tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
tmpl->rxq.strd_sz_n = config->mprq.stride_size_n;
tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
- tmpl->rxq.strd_headroom_en = strd_headroom_en;
+ tmpl->rxq.strd_scatter_en =
+ !!(offloads & DEV_RX_OFFLOAD_SCATTER);
tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
config->mprq.max_memcpy_len);
max_lro_size = RTE_MIN(max_rx_pkt_len,
@@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
unsigned int i = 0;
uint32_t rq_ci = rxq->rq_ci;
uint16_t consumed_strd = rxq->consumed_strd;
- uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM;
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
while (i < pkts_n) {
struct rte_mbuf *pkt;
void *addr;
int ret;
- unsigned int len;
+ uint32_t len;
uint16_t strd_cnt;
uint16_t strd_idx;
uint32_t offset;
uint32_t byte_cnt;
+ int32_t hdrm_overlap;
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
uint32_t rss_hash_res = 0;
- uint8_t lro_num_seg;
if (consumed_strd == strd_n) {
/* Replace WQE only if the buffer is still in use. */
@@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
MLX5_ASSERT(strd_idx < strd_n);
MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
wq_mask));
- lro_num_seg = cqe->lro_num_seg;
- /*
- * Currently configured to receive a packet per a stride. But if
- * MTU is adjusted through kernel interface, device could
- * consume multiple strides without raising an error. In this
- * case, the packet should be dropped because it is bigger than
- * the max_rx_pkt_len.
- */
- if (unlikely(!lro_num_seg && strd_cnt > 1)) {
- ++rxq->stats.idropped;
- continue;
- }
pkt = rte_pktmbuf_alloc(rxq->mp);
if (unlikely(pkt == NULL)) {
++rxq->stats.rx_nombuf;
@@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
len -= RTE_ETHER_CRC_LEN;
offset = strd_idx * strd_sz + strd_shift;
addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+ hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
/*
* Memcpy packets to the target mbuf if:
* - The size of packet is smaller than mprq_max_memcpy_len.
* - Out of buffer in the Mempool for Multi-Packet RQ.
+ * - There is no space for a headroom and scatter is disabled.
*/
- if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+ if (len <= rxq->mprq_max_memcpy_len ||
+ rxq->mprq_repl == NULL ||
+ (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
/*
* When memcpy'ing packet due to out-of-buffer, the
* packet must be smaller than the target mbuf.
@@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
rte_atomic16_add_return(&buf->refcnt, 1);
MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <=
strd_n + 1);
- buf_addr = RTE_PTR_SUB(addr, headroom_sz);
+ buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
/*
* MLX5 device doesn't use iova but it is necessary in a
* case where the Rx packet is transmitted via a
@@ -1788,43 +1779,42 @@ enum mlx5_txcmp_code {
rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
buf_len, shinfo);
/* Set mbuf head-room. */
- pkt->data_off = headroom_sz;
+ SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
- /*
- * Prevent potential overflow due to MTU change through
- * kernel interface.
- */
- if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
- rte_pktmbuf_free_seg(pkt);
- ++rxq->stats.idropped;
- continue;
- }
+ MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
+ len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
DATA_LEN(pkt) = len;
/*
- * LRO packet may consume all the stride memory, in this
- * case packet head-room space is not guaranteed so must
- * to add an empty mbuf for the head-room.
+ * Copy the last fragment of a packet (up to headroom
+ * size bytes) in case there is a stride overlap with
+ * a next packet's headroom. Allocate a separate mbuf
+ * to store this fragment and link it. Scatter is on.
*/
- if (!rxq->strd_headroom_en) {
- struct rte_mbuf *headroom_mbuf =
- rte_pktmbuf_alloc(rxq->mp);
+ if (hdrm_overlap > 0) {
+ MLX5_ASSERT(rxq->strd_scatter_en);
+ struct rte_mbuf *seg =
+ rte_pktmbuf_alloc(rxq->mp);
- if (unlikely(headroom_mbuf == NULL)) {
+ if (unlikely(seg == NULL)) {
rte_pktmbuf_free_seg(pkt);
++rxq->stats.rx_nombuf;
break;
}
- PORT(pkt) = rxq->port_id;
- NEXT(headroom_mbuf) = pkt;
- pkt = headroom_mbuf;
+ SET_DATA_OFF(seg, 0);
+ rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+ RTE_PTR_ADD(addr, len - hdrm_overlap),
+ hdrm_overlap);
+ DATA_LEN(seg) = hdrm_overlap;
+ DATA_LEN(pkt) = len - hdrm_overlap;
+ NEXT(pkt) = seg;
NB_SEGS(pkt) = 2;
}
}
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
- if (lro_num_seg > 1) {
+ if (cqe->lro_num_seg > 1) {
mlx5_lro_update_hdr(addr, cqe, len);
pkt->ol_flags |= PKT_RX_LRO;
- pkt->tso_segsz = strd_sz;
+ pkt->tso_segsz = len / cqe->lro_num_seg;
}
PKT_LEN(pkt) = len;
PORT(pkt) = rxq->port_id;
@@ -119,7 +119,7 @@ struct mlx5_rxq_data {
unsigned int strd_sz_n:4; /* Log 2 of stride size. */
unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
- unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */
+ unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
unsigned int lro:1; /* Enable LRO. */
unsigned int :1; /* Remaining bits. */
volatile uint32_t *rq_db;