[v4,13/14] net/mlx5: support shared Rx queue

Message ID 20211104123320.1638915-14-xuemingl@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: support shared Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xueming Li Nov. 4, 2021, 12:33 p.m. UTC
  This patch introduces shared RxQ. All shared Rx queues with same group
and queue ID share the same rxq_ctrl. Rxq_ctrl and rxq_data are shared,
all queues from different member port share same WQ and CQ, essentially
one Rx WQ, mbufs are filled into this singleton WQ.

Shared rxq_data is set into device Rx queues of all member ports as
RxQ object, used for receiving packets. Polling queue of any member
ports returns packets of any member, mbuf->port is used to identify
source port.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Slava Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/nics/features/mlx5.ini   |   1 +
 doc/guides/nics/mlx5.rst            |   6 +
 drivers/net/mlx5/linux/mlx5_os.c    |   2 +
 drivers/net/mlx5/linux/mlx5_verbs.c |   8 +-
 drivers/net/mlx5/mlx5.h             |   2 +
 drivers/net/mlx5/mlx5_devx.c        |  46 +++--
 drivers/net/mlx5/mlx5_ethdev.c      |   5 +
 drivers/net/mlx5/mlx5_rx.h          |   3 +
 drivers/net/mlx5/mlx5_rxq.c         | 273 ++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_trigger.c     |  61 ++++---
 10 files changed, 329 insertions(+), 78 deletions(-)
  

Patch

diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index 403f58cd7e2..7cbd11bb160 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -11,6 +11,7 @@  Removal event        = Y
 Rx interrupt         = Y
 Fast mbuf free       = Y
 Queue start/stop     = Y
+Shared Rx queue      = Y
 Burst mode info      = Y
 Power mgmt address monitor = Y
 MTU update           = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index bb92520dff4..824971d89ae 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -113,6 +113,7 @@  Features
 - Connection tracking.
 - Sub-Function representors.
 - Sub-Function.
+- Shared Rx queue.
 
 
 Limitations
@@ -465,6 +466,11 @@  Limitations
   - In order to achieve best insertion rate, application should manage the flows per lcore.
   - Better to disable memory reclaim by setting ``reclaim_mem_mode`` to 0 to accelerate the flow object allocation and release with cache.
 
+ Shared Rx queue:
+
+  - Counters of received packets and bytes number of devices in same share group are same.
+  - Counters of received packets and bytes number of queues in same group and queue ID are same.
+
 - HW hashed bonding
 
   - TXQ affinity subjects to HW hash once enabled.
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index f51da8c3a38..e0304b685e5 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -420,6 +420,7 @@  mlx5_alloc_shared_dr(struct mlx5_priv *priv)
 			mlx5_glue->dr_create_flow_action_default_miss();
 	if (!sh->default_miss_action)
 		DRV_LOG(WARNING, "Default miss action is not supported.");
+	LIST_INIT(&sh->shared_rxqs);
 	return 0;
 error:
 	/* Rollback the created objects. */
@@ -494,6 +495,7 @@  mlx5_os_free_shared_dr(struct mlx5_priv *priv)
 	MLX5_ASSERT(sh && sh->refcnt);
 	if (sh->refcnt > 1)
 		return;
+	MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs));
 #ifdef HAVE_MLX5DV_DR
 	if (sh->rx_domain) {
 		mlx5_glue->dr_destroy_domain(sh->rx_domain);
diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c b/drivers/net/mlx5/linux/mlx5_verbs.c
index f78916c868f..9d299542614 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -424,14 +424,16 @@  mlx5_rxq_ibv_obj_release(struct mlx5_rxq_priv *rxq)
 {
 	struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
 
-	MLX5_ASSERT(rxq_obj);
-	MLX5_ASSERT(rxq_obj->wq);
-	MLX5_ASSERT(rxq_obj->ibv_cq);
+	if (rxq_obj == NULL || rxq_obj->wq == NULL)
+		return;
 	claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
+	rxq_obj->wq = NULL;
+	MLX5_ASSERT(rxq_obj->ibv_cq);
 	claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
 	if (rxq_obj->ibv_channel)
 		claim_zero(mlx5_glue->destroy_comp_channel
 							(rxq_obj->ibv_channel));
+	rxq->ctrl->started = false;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a037a33debf..51f45788381 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1200,6 +1200,7 @@  struct mlx5_dev_ctx_shared {
 	struct mlx5_ecpri_parser_profile ecpri_parser;
 	/* Flex parser profiles information. */
 	void *devx_rx_uar; /* DevX UAR for Rx. */
+	LIST_HEAD(shared_rxqs, mlx5_rxq_ctrl) shared_rxqs; /* Shared RXQs. */
 	struct mlx5_aso_age_mng *aso_age_mng;
 	/* Management data for aging mechanism using ASO Flow Hit. */
 	struct mlx5_geneve_tlv_option_resource *geneve_tlv_option_resource;
@@ -1267,6 +1268,7 @@  struct mlx5_rxq_obj {
 		};
 		struct mlx5_devx_obj *rq; /* DevX RQ object for hairpin. */
 		struct {
+			struct mlx5_devx_rmp devx_rmp; /* RMP for shared RQ. */
 			struct mlx5_devx_cq cq_obj; /* DevX CQ object. */
 			void *devx_channel;
 		};
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 668d47025e8..d3d189ab7f2 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -88,6 +88,8 @@  mlx5_devx_modify_rq(struct mlx5_rxq_priv *rxq, uint8_t type)
 	default:
 		break;
 	}
+	if (rxq->ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
+		return mlx5_devx_cmd_modify_rq(rxq->ctrl->obj->rq, &rq_attr);
 	return mlx5_devx_cmd_modify_rq(rxq->devx_rq.rq, &rq_attr);
 }
 
@@ -156,18 +158,21 @@  mlx5_txq_devx_modify(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
 static void
 mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
 {
-	struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
-	struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
+	struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
 
-	MLX5_ASSERT(rxq != NULL);
-	MLX5_ASSERT(rxq_ctrl != NULL);
+	if (rxq_obj == NULL)
+		return;
 	if (rxq_obj->rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN) {
-		MLX5_ASSERT(rxq_obj->rq);
+		if (rxq_obj->rq == NULL)
+			return;
 		mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RST);
 		claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
 	} else {
+		if (rxq->devx_rq.rq == NULL)
+			return;
 		mlx5_devx_rq_destroy(&rxq->devx_rq);
-		memset(&rxq->devx_rq, 0, sizeof(rxq->devx_rq));
+		if (rxq->devx_rq.rmp != NULL && rxq->devx_rq.rmp->ref_cnt > 0)
+			return;
 		mlx5_devx_cq_destroy(&rxq_obj->cq_obj);
 		memset(&rxq_obj->cq_obj, 0, sizeof(rxq_obj->cq_obj));
 		if (rxq_obj->devx_channel) {
@@ -176,6 +181,7 @@  mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
 			rxq_obj->devx_channel = NULL;
 		}
 	}
+	rxq->ctrl->started = false;
 }
 
 /**
@@ -271,6 +277,8 @@  mlx5_rxq_create_devx_rq_resources(struct mlx5_rxq_priv *rxq)
 						MLX5_WQ_END_PAD_MODE_NONE;
 	rq_attr.wq_attr.pd = cdev->pdn;
 	rq_attr.counter_set_id = priv->counter_set_id;
+	if (rxq_data->shared) /* Create RMP based RQ. */
+		rxq->devx_rq.rmp = &rxq_ctrl->obj->devx_rmp;
 	/* Create RQ using DevX API. */
 	return mlx5_devx_rq_create(cdev->ctx, &rxq->devx_rq, wqe_size,
 				   log_desc_n, &rq_attr, rxq_ctrl->socket);
@@ -300,6 +308,8 @@  mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
 	uint16_t event_nums[1] = { 0 };
 	int ret = 0;
 
+	if (rxq_ctrl->started)
+		return 0;
 	if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
 	    !rxq_data->lro) {
 		cq_attr.cqe_comp_en = 1u;
@@ -365,6 +375,7 @@  mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
 	rxq_data->cq_uar = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
 	rxq_data->cqe_n = log_cqe_n;
 	rxq_data->cqn = cq_obj->cq->id;
+	rxq_data->cq_ci = 0;
 	if (rxq_ctrl->obj->devx_channel) {
 		ret = mlx5_os_devx_subscribe_devx_event
 					      (rxq_ctrl->obj->devx_channel,
@@ -463,7 +474,7 @@  mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
 	if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
 		return mlx5_rxq_obj_hairpin_new(rxq);
 	tmpl->rxq_ctrl = rxq_ctrl;
-	if (rxq_ctrl->irq) {
+	if (rxq_ctrl->irq && !rxq_ctrl->started) {
 		int devx_ev_flag =
 			  MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA;
 
@@ -496,11 +507,19 @@  mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
 	ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RST2RDY);
 	if (ret)
 		goto error;
-	rxq_data->wqes = (void *)(uintptr_t)rxq->devx_rq.wq.umem_buf;
-	rxq_data->rq_db = (uint32_t *)(uintptr_t)rxq->devx_rq.wq.db_rec;
-	mlx5_rxq_initialize(rxq_data);
+	if (!rxq_data->shared) {
+		rxq_data->wqes = (void *)(uintptr_t)rxq->devx_rq.wq.umem_buf;
+		rxq_data->rq_db = (uint32_t *)(uintptr_t)rxq->devx_rq.wq.db_rec;
+	} else if (!rxq_ctrl->started) {
+		rxq_data->wqes = (void *)(uintptr_t)tmpl->devx_rmp.wq.umem_buf;
+		rxq_data->rq_db =
+				(uint32_t *)(uintptr_t)tmpl->devx_rmp.wq.db_rec;
+	}
+	if (!rxq_ctrl->started) {
+		mlx5_rxq_initialize(rxq_data);
+		rxq_ctrl->wqn = rxq->devx_rq.rq->id;
+	}
 	priv->dev_data->rx_queue_state[rxq->idx] = RTE_ETH_QUEUE_STATE_STARTED;
-	rxq_ctrl->wqn = rxq->devx_rq.rq->id;
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
@@ -558,7 +577,10 @@  mlx5_devx_ind_table_create_rqt_attr(struct rte_eth_dev *dev,
 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, queues[i]);
 
 		MLX5_ASSERT(rxq != NULL);
-		rqt_attr->rq_list[i] = rxq->devx_rq.rq->id;
+		if (rxq->ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
+			rqt_attr->rq_list[i] = rxq->ctrl->obj->rq->id;
+		else
+			rqt_attr->rq_list[i] = rxq->devx_rq.rq->id;
 	}
 	MLX5_ASSERT(i > 0);
 	for (j = 0; i != rqt_n; ++j, ++i)
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index bb38d5d2ade..dc647d5580c 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -26,6 +26,7 @@ 
 #include "mlx5_rx.h"
 #include "mlx5_tx.h"
 #include "mlx5_autoconf.h"
+#include "mlx5_devx.h"
 
 /**
  * Get the interface index from device name.
@@ -336,9 +337,13 @@  mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
 	mlx5_set_default_params(dev, info);
 	mlx5_set_txlimit_params(dev, info);
+	if (priv->config.hca_attr.mem_rq_rmp &&
+	    priv->obj_ops.rxq_obj_new == devx_obj_ops.rxq_obj_new)
+		info->dev_capa |= RTE_ETH_DEV_CAPA_RXQ_SHARE;
 	info->switch_info.name = dev->data->name;
 	info->switch_info.domain_id = priv->domain_id;
 	info->switch_info.port_id = priv->representor_id;
+	info->switch_info.rx_domain = 0; /* No sub Rx domains. */
 	if (priv->representor) {
 		uint16_t port_id;
 
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 413e36f6d8d..eda6eca8dea 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -96,6 +96,7 @@  struct mlx5_rxq_data {
 	unsigned int lro:1; /* Enable LRO. */
 	unsigned int dynf_meta:1; /* Dynamic metadata is configured. */
 	unsigned int mcqe_format:3; /* CQE compression format. */
+	unsigned int shared:1; /* Shared RXQ. */
 	volatile uint32_t *rq_db;
 	volatile uint32_t *cq_db;
 	uint16_t port_id;
@@ -158,8 +159,10 @@  struct mlx5_rxq_ctrl {
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
 	enum mlx5_rxq_type type; /* Rxq type. */
 	unsigned int socket; /* CPU socket ID for allocations. */
+	LIST_ENTRY(mlx5_rxq_ctrl) share_entry; /* Entry in shared RXQ list. */
 	uint32_t share_group; /* Group ID of shared RXQ. */
 	uint16_t share_qid; /* Shared RxQ ID in group. */
+	unsigned int started:1; /* Whether (shared) RXQ has been started. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f3fc618ed2c..8feb3e2c0fb 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -29,6 +29,7 @@ 
 #include "mlx5_rx.h"
 #include "mlx5_utils.h"
 #include "mlx5_autoconf.h"
+#include "mlx5_devx.h"
 
 
 /* Default RSS hash key also used for ConnectX-3. */
@@ -633,14 +634,19 @@  mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx)
  *   RX queue index.
  * @param desc
  *   Number of descriptors to configure in queue.
+ * @param[out] rxq_ctrl
+ *   Address of pointer to shared Rx queue control.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
+mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc,
+			struct mlx5_rxq_ctrl **rxq_ctrl)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_rxq_priv *rxq;
+	bool empty;
 
 	if (!rte_is_power_of_2(*desc)) {
 		*desc = 1 << log2above(*desc);
@@ -657,16 +663,143 @@  mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
 		rte_errno = EOVERFLOW;
 		return -rte_errno;
 	}
-	if (!mlx5_rxq_releasable(dev, idx)) {
-		DRV_LOG(ERR, "port %u unable to release queue index %u",
-			dev->data->port_id, idx);
-		rte_errno = EBUSY;
-		return -rte_errno;
+	if (rxq_ctrl == NULL || *rxq_ctrl == NULL)
+		return 0;
+	if (!(*rxq_ctrl)->rxq.shared) {
+		if (!mlx5_rxq_releasable(dev, idx)) {
+			DRV_LOG(ERR, "port %u unable to release queue index %u",
+				dev->data->port_id, idx);
+			rte_errno = EBUSY;
+			return -rte_errno;
+		}
+		mlx5_rxq_release(dev, idx);
+	} else {
+		if ((*rxq_ctrl)->obj != NULL)
+			/* Some port using shared Rx queue has been started. */
+			return 0;
+		/* Release all owner RxQ to reconfigure Shared RxQ. */
+		do {
+			rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
+			LIST_REMOVE(rxq, owner_entry);
+			empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
+			mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
+		} while (!empty);
+		*rxq_ctrl = NULL;
 	}
-	mlx5_rxq_release(dev, idx);
 	return 0;
 }
 
+/**
+ * Get the shared Rx queue object that matches group and queue index.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param group
+ *   Shared RXQ group.
+ * @param share_qid
+ *   Shared RX queue index.
+ *
+ * @return
+ *   Shared RXQ object that matching, or NULL if not found.
+ */
+static struct mlx5_rxq_ctrl *
+mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
+		if (rxq_ctrl->share_group == group &&
+		    rxq_ctrl->share_qid == share_qid)
+			return rxq_ctrl;
+	}
+	return NULL;
+}
+
+/**
+ * Check whether requested Rx queue configuration matches shared RXQ.
+ *
+ * @param rxq_ctrl
+ *   Pointer to shared RXQ.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
+ * @param mp
+ *   Memory pool for buffer allocations.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static bool
+mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev,
+		      uint16_t idx, uint16_t desc, unsigned int socket,
+		      const struct rte_eth_rxconf *conf,
+		      struct rte_mempool *mp)
+{
+	struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	unsigned int i;
+
+	RTE_SET_USED(conf);
+	if (rxq_ctrl->socket != socket) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	if (priv->mtu != spriv->mtu) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	if (priv->dev_data->dev_conf.intr_conf.rxq !=
+	    spriv->dev_data->dev_conf.intr_conf.rxq) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	if (mp != NULL && rxq_ctrl->rxq.mp != mp) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch",
+			dev->data->port_id, idx);
+		return false;
+	} else if (mp == NULL) {
+		for (i = 0; i < conf->rx_nseg; i++) {
+			if (conf->rx_seg[i].split.mp !=
+			    rxq_ctrl->rxq.rxseg[i].mp ||
+			    conf->rx_seg[i].split.length !=
+			    rxq_ctrl->rxq.rxseg[i].length) {
+				DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch",
+					dev->data->port_id, idx, i);
+				return false;
+			}
+		}
+	}
+	if (priv->config.hw_padding != spriv->config.hw_padding) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	if (priv->config.cqe_comp != spriv->config.cqe_comp ||
+	    (priv->config.cqe_comp &&
+	     priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) {
+		DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch",
+			dev->data->port_id, idx);
+		return false;
+	}
+	return true;
+}
+
 /**
  *
  * @param dev
@@ -692,12 +825,14 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_rxq_priv *rxq;
-	struct mlx5_rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
 	struct rte_eth_rxseg_split *rx_seg =
 				(struct rte_eth_rxseg_split *)conf->rx_seg;
 	struct rte_eth_rxseg_split rx_single = {.mp = mp};
 	uint16_t n_seg = conf->rx_nseg;
 	int res;
+	uint64_t offloads = conf->offloads |
+			    dev->data->dev_conf.rxmode.offloads;
 
 	if (mp) {
 		/*
@@ -709,9 +844,6 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		n_seg = 1;
 	}
 	if (n_seg > 1) {
-		uint64_t offloads = conf->offloads |
-				    dev->data->dev_conf.rxmode.offloads;
-
 		/* The offloads should be checked on rte_eth_dev layer. */
 		MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
 		if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
@@ -723,9 +855,46 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		}
 		MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG);
 	}
-	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
+	if (conf->share_group > 0) {
+		if (!priv->config.hca_attr.mem_rq_rmp) {
+			DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw",
+				     dev->data->port_id, idx);
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) {
+			DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api",
+				     dev->data->port_id, idx);
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		if (conf->share_qid >= priv->rxqs_n) {
+			DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u",
+				dev->data->port_id, conf->share_qid,
+				priv->rxqs_n);
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		if (priv->config.mprq.enabled) {
+			DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled",
+				dev->data->port_id, conf->share_qid);
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+		/* Try to reuse shared RXQ. */
+		rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group,
+					       conf->share_qid);
+		if (rxq_ctrl != NULL &&
+		    !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
+					   conf, mp)) {
+			rte_errno = EINVAL;
+			return -rte_errno;
+		}
+	}
+	res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
 	if (res)
 		return res;
+	/* Allocate RXQ. */
 	rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0,
 			  SOCKET_ID_ANY);
 	if (!rxq) {
@@ -737,15 +906,23 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	rxq->priv = priv;
 	rxq->idx = idx;
 	(*priv->rxq_privs)[idx] = rxq;
-	rxq_ctrl = mlx5_rxq_new(dev, rxq, desc, socket, conf, rx_seg, n_seg);
-	if (!rxq_ctrl) {
-		DRV_LOG(ERR, "port %u unable to allocate rx queue index %u",
-			dev->data->port_id, idx);
-		mlx5_free(rxq);
-		(*priv->rxq_privs)[idx] = NULL;
-		rte_errno = ENOMEM;
-		return -rte_errno;
+	if (rxq_ctrl != NULL) {
+		/* Join owner list. */
+		LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry);
+		rxq->ctrl = rxq_ctrl;
+	} else {
+		rxq_ctrl = mlx5_rxq_new(dev, rxq, desc, socket, conf, rx_seg,
+					n_seg);
+		if (rxq_ctrl == NULL) {
+			DRV_LOG(ERR, "port %u unable to allocate rx queue index %u",
+				dev->data->port_id, idx);
+			mlx5_free(rxq);
+			(*priv->rxq_privs)[idx] = NULL;
+			rte_errno = ENOMEM;
+			return -rte_errno;
+		}
 	}
+	mlx5_rxq_ref(dev, idx);
 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
 		dev->data->port_id, idx);
 	dev->data->rx_queues[idx] = &rxq_ctrl->rxq;
@@ -776,7 +953,7 @@  mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	int res;
 
-	res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
+	res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL);
 	if (res)
 		return res;
 	if (hairpin_conf->peer_count != 1) {
@@ -1095,6 +1272,9 @@  mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
 	struct mlx5_rxq_obj *rxq_obj;
 
 	LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
+		if (rxq_obj->rxq_ctrl->rxq.shared &&
+		    !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners))
+			continue;
 		DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
 			dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
 		++ret;
@@ -1413,6 +1593,12 @@  mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
 		return NULL;
 	}
 	LIST_INIT(&tmpl->owners);
+	if (conf->share_group > 0) {
+		tmpl->rxq.shared = 1;
+		tmpl->share_group = conf->share_group;
+		tmpl->share_qid = conf->share_qid;
+		LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+	}
 	rxq->ctrl = tmpl;
 	LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry);
 	MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
@@ -1661,7 +1847,6 @@  mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
 	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
 #endif
 	tmpl->rxq.idx = idx;
-	mlx5_rxq_ref(dev, idx);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
 error:
@@ -1836,31 +2021,41 @@  mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_rxq_priv *rxq;
 	struct mlx5_rxq_ctrl *rxq_ctrl;
+	uint32_t refcnt;
 
 	if (priv->rxq_privs == NULL)
 		return 0;
 	rxq = mlx5_rxq_get(dev, idx);
-	if (rxq == NULL)
+	if (rxq == NULL || rxq->refcnt == 0)
 		return 0;
-	if (mlx5_rxq_deref(dev, idx) > 1)
-		return 1;
 	rxq_ctrl = rxq->ctrl;
-	if (rxq_ctrl->obj != NULL) {
+	refcnt = mlx5_rxq_deref(dev, idx);
+	if (refcnt > 1) {
+		return 1;
+	} else if (refcnt == 1) { /* RxQ stopped. */
 		priv->obj_ops.rxq_obj_release(rxq);
-		LIST_REMOVE(rxq_ctrl->obj, next);
-		mlx5_free(rxq_ctrl->obj);
-		rxq_ctrl->obj = NULL;
-	}
-	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-		rxq_free_elts(rxq_ctrl);
-		dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED;
-	}
-	if (!__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED)) {
-		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
-			mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+		if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) {
+			LIST_REMOVE(rxq_ctrl->obj, next);
+			mlx5_free(rxq_ctrl->obj);
+			rxq_ctrl->obj = NULL;
+		}
+		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
+			if (!rxq_ctrl->started)
+				rxq_free_elts(rxq_ctrl);
+			dev->data->rx_queue_state[idx] =
+					RTE_ETH_QUEUE_STATE_STOPPED;
+		}
+	} else { /* Refcnt zero, closing device. */
 		LIST_REMOVE(rxq, owner_entry);
-		LIST_REMOVE(rxq_ctrl, next);
-		mlx5_free(rxq_ctrl);
+		if (LIST_EMPTY(&rxq_ctrl->owners)) {
+			if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
+				mlx5_mr_btree_free
+					(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+			if (rxq_ctrl->rxq.shared)
+				LIST_REMOVE(rxq_ctrl, share_entry);
+			LIST_REMOVE(rxq_ctrl, next);
+			mlx5_free(rxq_ctrl);
+		}
 		dev->data->rx_queues[idx] = NULL;
 		mlx5_free(rxq);
 		(*priv->rxq_privs)[idx] = NULL;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 72475e4b5b5..a3e62e95335 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -176,6 +176,39 @@  mlx5_rxq_stop(struct rte_eth_dev *dev)
 		mlx5_rxq_release(dev, i);
 }
 
+static int
+mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
+		      unsigned int idx)
+{
+	int ret = 0;
+
+	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
+		/*
+		 * Pre-register the mempools. Regardless of whether
+		 * the implicit registration is enabled or not,
+		 * Rx mempool destruction is tracked to free MRs.
+		 */
+		if (mlx5_rxq_mempool_register(dev, rxq_ctrl) < 0)
+			return -rte_errno;
+		ret = rxq_alloc_elts(rxq_ctrl);
+		if (ret)
+			return ret;
+	}
+	MLX5_ASSERT(!rxq_ctrl->obj);
+	rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+				    sizeof(*rxq_ctrl->obj), 0,
+				    rxq_ctrl->socket);
+	if (!rxq_ctrl->obj) {
+		DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
+			dev->data->port_id, idx);
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
+		idx, (void *)&rxq_ctrl->obj);
+	return 0;
+}
+
 /**
  * Start traffic on Rx queues.
  *
@@ -208,28 +241,10 @@  mlx5_rxq_start(struct rte_eth_dev *dev)
 		if (rxq == NULL)
 			continue;
 		rxq_ctrl = rxq->ctrl;
-		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-			/*
-			 * Pre-register the mempools. Regardless of whether
-			 * the implicit registration is enabled or not,
-			 * Rx mempool destruction is tracked to free MRs.
-			 */
-			if (mlx5_rxq_mempool_register(dev, rxq_ctrl) < 0)
-				goto error;
-			ret = rxq_alloc_elts(rxq_ctrl);
-			if (ret)
+		if (!rxq_ctrl->started) {
+			if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
 				goto error;
-		}
-		MLX5_ASSERT(!rxq_ctrl->obj);
-		rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
-					    sizeof(*rxq_ctrl->obj), 0,
-					    rxq_ctrl->socket);
-		if (!rxq_ctrl->obj) {
-			DRV_LOG(ERR,
-				"Port %u Rx queue %u can't allocate resources.",
-				dev->data->port_id, i);
-			rte_errno = ENOMEM;
-			goto error;
+			LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
 		}
 		ret = priv->obj_ops.rxq_obj_new(rxq);
 		if (ret) {
@@ -237,9 +252,7 @@  mlx5_rxq_start(struct rte_eth_dev *dev)
 			rxq_ctrl->obj = NULL;
 			goto error;
 		}
-		DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
-			dev->data->port_id, i, (void *)&rxq_ctrl->obj);
-		LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
+		rxq_ctrl->started = true;
 	}
 	return 0;
 error: