[2/3] common/mlx5: fix redundant parameter in search MR function

Message ID 20211103101707.1418097-3-michaelba@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series mlx5: fix performance degradation |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Nov. 3, 2021, 10:17 a.m. UTC
  From: Michael Baum <michaelba@oss.nvidia.com>

Memory region management has recently been shared between drivers,
including the search for caches in the data plane.
The initial search in the local linear cache of the queue, usually
yields a result and one should not continue searching in the next level
caches.

The function that searches in the local cache gets the pointer to a
device as a parameter, that is not necessary for its operation
but for subsequent searches (which, as mentioned, usually do not
happen).
Transferring the device to a function and maintaining it, takes some
time and causes some impact on performance.

Add the pointer to the device as a field of the mr_ctrl structure. The
field will be updated during control path and will be used only when
needed in the search.

Fixes: fc59a1ec556b ("common/mlx5: share MR mempool registration")

Signed-off-by: Michael Baum <michaelba@oss.nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Reviewed-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/common/mlx5/mlx5_common_mr.c     | 14 +++++++-----
 drivers/common/mlx5/mlx5_common_mr.h     | 28 ++++++++++-------------
 drivers/compress/mlx5/mlx5_compress.c    |  4 ++--
 drivers/crypto/mlx5/mlx5_crypto.c        | 24 +++++++++-----------
 drivers/net/mlx5/mlx5_rx.h               | 10 ++------
 drivers/net/mlx5/mlx5_rxq.c              |  3 +--
 drivers/net/mlx5/mlx5_tx.h               |  3 +--
 drivers/net/mlx5/mlx5_txq.c              |  3 +--
 drivers/regex/mlx5/mlx5_regex_control.c  |  3 +--
 drivers/regex/mlx5/mlx5_regex_fastpath.c | 29 ++++--------------------
 10 files changed, 43 insertions(+), 78 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index 903ed0652c..003d358f96 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -292,8 +292,8 @@  mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused)
  *
  * @param mr_ctrl
  *   Pointer to MR control structure.
- * @param dev_gen_ptr
- *   Pointer to generation number of global cache.
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
  * @param socket
  *   NUMA socket on which memory must be allocated.
  *
@@ -301,15 +301,16 @@  mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr,
+mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, struct mlx5_common_device *cdev,
 		  int socket)
 {
 	if (mr_ctrl == NULL) {
 		rte_errno = EINVAL;
 		return -rte_errno;
 	}
+	mr_ctrl->cdev = cdev;
 	/* Save pointer of global generation number to check memory event. */
-	mr_ctrl->dev_gen_ptr = dev_gen_ptr;
+	mr_ctrl->dev_gen_ptr = &cdev->mr_scache.dev_gen;
 	/* Initialize B-tree and allocate memory for bottom-half cache table. */
 	return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N,
 				  socket);
@@ -1860,11 +1861,12 @@  mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache,
 }
 
 uint32_t
-mlx5_mr_mb2mr_bh(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
-		 struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb)
+mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb,
+		 struct mlx5_mp_id *mp_id)
 {
 	uint32_t lkey;
 	uintptr_t addr = (uintptr_t)mb->buf_addr;
+	struct mlx5_common_device *cdev = mr_ctrl->cdev;
 
 	if (cdev->config.mr_mempool_reg_en) {
 		struct rte_mempool *mp = NULL;
diff --git a/drivers/common/mlx5/mlx5_common_mr.h b/drivers/common/mlx5/mlx5_common_mr.h
index 8771c7d02b..f65974b8a9 100644
--- a/drivers/common/mlx5/mlx5_common_mr.h
+++ b/drivers/common/mlx5/mlx5_common_mr.h
@@ -66,6 +66,7 @@  struct mlx5_common_device;
 
 /* Per-queue MR control descriptor. */
 struct mlx5_mr_ctrl {
+	struct mlx5_common_device *cdev; /* Pointer to the mlx5 common device.*/
 	uint32_t *dev_gen_ptr; /* Generation number of device to poll. */
 	uint32_t cur_gen; /* Generation number saved to flush caches. */
 	uint16_t mru; /* Index of last hit entry in top-half cache. */
@@ -169,41 +170,36 @@  void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
  * Bottom-half of LKey search on. If supported, lookup for the address from
  * the mempool. Otherwise, search in old mechanism caches.
  *
- * @param cdev
- *   Pointer to mlx5 device.
- * @param mp_id
- *   Multi-process identifier, may be NULL for the primary process.
  * @param mr_ctrl
  *   Pointer to per-queue MR control structure.
  * @param mb
  *   Pointer to mbuf.
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  *
  * @return
  *   Searched LKey on success, UINT32_MAX on no match.
  */
 __rte_internal
-uint32_t mlx5_mr_mb2mr_bh(struct mlx5_common_device *cdev,
-			  struct mlx5_mp_id *mp_id,
-			  struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb);
+uint32_t mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
+			  struct mlx5_mp_id *mp_id);
 
 /**
  * Query LKey from a packet buffer.
  *
- * @param cdev
- *   Pointer to the mlx5 device structure.
- * @param mp_id
- *   Multi-process identifier, may be NULL for the primary process.
  * @param mr_ctrl
  *   Pointer to per-queue MR control structure.
  * @param mbuf
  *   Pointer to mbuf.
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  *
  * @return
  *   Searched LKey on success, UINT32_MAX on no match.
  */
 static __rte_always_inline uint32_t
-mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
-	      struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf)
+mlx5_mr_mb2mr(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
+	      struct mlx5_mp_id *mp_id)
 {
 	uint32_t lkey;
 
@@ -216,14 +212,14 @@  mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
 	if (likely(lkey != UINT32_MAX))
 		return lkey;
 	/* Take slower bottom-half on miss. */
-	return mlx5_mr_mb2mr_bh(cdev, mp_id, mr_ctrl, mbuf);
+	return mlx5_mr_mb2mr_bh(mr_ctrl, mbuf, mp_id);
 }
 
 /* mlx5_common_mr.c */
 
 __rte_internal
-int mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr,
-		      int socket);
+int mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl,
+		      struct mlx5_common_device *cdev, int socket);
 __rte_internal
 void mlx5_mr_btree_free(struct mlx5_mr_btree *bt);
 void mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused);
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index c4081c5f7d..5cf6d647af 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -205,7 +205,7 @@  mlx5_compress_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
 		return -rte_errno;
 	}
 	dev->data->queue_pairs[qp_id] = qp;
-	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
+	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev,
 			      priv->dev_config.socket_id)) {
 		DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
 			(uint32_t)qp_id);
@@ -471,7 +471,7 @@  mlx5_compress_dseg_set(struct mlx5_compress_qp *qp,
 	uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
 
 	dseg->bcount = rte_cpu_to_be_32(len);
-	dseg->lkey = mlx5_mr_mb2mr(qp->priv->cdev, 0, &qp->mr_ctrl, mbuf);
+	dseg->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
 	dseg->pbuf = rte_cpu_to_be_64(addr);
 	return dseg->lkey;
 }
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index f430d8cde0..1740dba003 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -305,9 +305,9 @@  mlx5_crypto_get_block_size(struct rte_crypto_op *op)
 }
 
 static __rte_always_inline uint32_t
-mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
-		      struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm,
-		      uint32_t offset, uint32_t *remain)
+mlx5_crypto_klm_set(struct mlx5_crypto_qp *qp, struct rte_mbuf *mbuf,
+		    struct mlx5_wqe_dseg *klm, uint32_t offset,
+		    uint32_t *remain)
 {
 	uint32_t data_len = (rte_pktmbuf_data_len(mbuf) - offset);
 	uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
@@ -317,22 +317,21 @@  mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
 	*remain -= data_len;
 	klm->bcount = rte_cpu_to_be_32(data_len);
 	klm->pbuf = rte_cpu_to_be_64(addr);
-	klm->lkey = mlx5_mr_mb2mr(priv->cdev, 0, &qp->mr_ctrl, mbuf);
+	klm->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
 	return klm->lkey;
 
 }
 
 static __rte_always_inline uint32_t
-mlx5_crypto_klms_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
-		     struct rte_crypto_op *op, struct rte_mbuf *mbuf,
-		     struct mlx5_wqe_dseg *klm)
+mlx5_crypto_klms_set(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op,
+		     struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm)
 {
 	uint32_t remain_len = op->sym->cipher.data.length;
 	uint32_t nb_segs = mbuf->nb_segs;
 	uint32_t klm_n = 1u;
 
 	/* First mbuf needs to take the cipher offset. */
-	if (unlikely(mlx5_crypto_klm_set(priv, qp, mbuf, klm,
+	if (unlikely(mlx5_crypto_klm_set(qp, mbuf, klm,
 		     op->sym->cipher.data.offset, &remain_len) == UINT32_MAX)) {
 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
 		return 0;
@@ -344,7 +343,7 @@  mlx5_crypto_klms_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
 			op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
 			return 0;
 		}
-		if (unlikely(mlx5_crypto_klm_set(priv, qp, mbuf, ++klm, 0,
+		if (unlikely(mlx5_crypto_klm_set(qp, mbuf, ++klm, 0,
 						 &remain_len) == UINT32_MAX)) {
 			op->status = RTE_CRYPTO_OP_STATUS_ERROR;
 			return 0;
@@ -370,7 +369,7 @@  mlx5_crypto_wqe_set(struct mlx5_crypto_priv *priv,
 	uint32_t ds;
 	bool ipl = op->sym->m_dst == NULL || op->sym->m_dst == op->sym->m_src;
 	/* Set UMR WQE. */
-	uint32_t klm_n = mlx5_crypto_klms_set(priv, qp, op,
+	uint32_t klm_n = mlx5_crypto_klms_set(qp, op,
 				   ipl ? op->sym->m_src : op->sym->m_dst, klms);
 
 	if (unlikely(klm_n == 0))
@@ -396,8 +395,7 @@  mlx5_crypto_wqe_set(struct mlx5_crypto_priv *priv,
 	cseg = RTE_PTR_ADD(cseg, priv->umr_wqe_size);
 	klms = RTE_PTR_ADD(cseg, sizeof(struct mlx5_rdma_write_wqe));
 	if (!ipl) {
-		klm_n = mlx5_crypto_klms_set(priv, qp, op, op->sym->m_src,
-					     klms);
+		klm_n = mlx5_crypto_klms_set(qp, op, op->sym->m_src, klms);
 		if (unlikely(klm_n == 0))
 			return 0;
 	} else {
@@ -643,7 +641,7 @@  mlx5_crypto_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 		DRV_LOG(ERR, "Failed to create QP.");
 		goto error;
 	}
-	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
+	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev,
 			      priv->dev_config.socket_id) != 0) {
 		DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
 			(uint32_t)qp_id);
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 4952fe1455..322f234628 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -282,7 +282,6 @@  static __rte_always_inline uint32_t
 mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
 {
 	struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl;
-	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct rte_mempool *mp;
 	uint32_t lkey;
 
@@ -291,14 +290,9 @@  mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
 				   MLX5_MR_CACHE_N, addr);
 	if (likely(lkey != UINT32_MAX))
 		return lkey;
-	/*
-	 * Slower search in the mempool database on miss.
-	 * During queue creation rxq->sh is not yet set, so we use rxq_ctrl.
-	 */
-	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	mp = mlx5_rxq_mprq_enabled(rxq) ? rxq->mprq_mp : rxq->mp;
-	return mlx5_mr_mempool2mr_bh(&rxq_ctrl->priv->sh->cdev->mr_scache,
-				     mr_ctrl, mp, addr);
+	return mlx5_mr_mempool2mr_bh(&mr_ctrl->cdev->mr_scache, mr_ctrl,
+				     mp, addr);
 }
 
 #define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 4f02fe02b9..1fc2f0e0c1 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1455,8 +1455,7 @@  mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		goto error;
 	}
 	tmpl->type = MLX5_RXQ_TYPE_STANDARD;
-	if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
-			      &priv->sh->cdev->mr_scache.dev_gen, socket)) {
+	if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, priv->sh->cdev, socket)) {
 		/* rte_errno is already set. */
 		goto error;
 	}
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index ea20213a40..7fed0e7cb9 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -368,10 +368,9 @@  mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 	struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
 	struct mlx5_txq_ctrl *txq_ctrl =
 			container_of(txq, struct mlx5_txq_ctrl, txq);
-	struct mlx5_priv *priv = txq_ctrl->priv;
 
 	/* Take slower bottom-half on miss. */
-	return mlx5_mr_mb2mr(priv->sh->cdev, &priv->mp_id, mr_ctrl, mb);
+	return mlx5_mr_mb2mr(mr_ctrl, mb, &txq_ctrl->priv->mp_id);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index e2a38d980a..e9ab7fa266 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1134,8 +1134,7 @@  mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		rte_errno = ENOMEM;
 		return NULL;
 	}
-	if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
-			      &priv->sh->cdev->mr_scache.dev_gen, socket)) {
+	if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl, priv->sh->cdev, socket)) {
 		/* rte_errno is already set. */
 		goto error;
 	}
diff --git a/drivers/regex/mlx5/mlx5_regex_control.c b/drivers/regex/mlx5/mlx5_regex_control.c
index 50c966a022..e40b1f20d9 100644
--- a/drivers/regex/mlx5/mlx5_regex_control.c
+++ b/drivers/regex/mlx5/mlx5_regex_control.c
@@ -242,8 +242,7 @@  mlx5_regex_qp_setup(struct rte_regexdev *dev, uint16_t qp_ind,
 		nb_sq_config++;
 	}
 
-	ret = mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
-				rte_socket_id());
+	ret = mlx5_mr_ctrl_init(&qp->mr_ctrl, priv->cdev, rte_socket_id());
 	if (ret) {
 		DRV_LOG(ERR, "Error setting up mr btree");
 		goto err_btree;
diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c b/drivers/regex/mlx5/mlx5_regex_fastpath.c
index adb5343a46..943cb9c19e 100644
--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
@@ -109,26 +109,6 @@  set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
 	seg->imm = imm;
 }
 
-/**
- * Query LKey from a packet buffer for QP. If not found, add the mempool.
- *
- * @param priv
- *   Pointer to the priv object.
- * @param mr_ctrl
- *   Pointer to per-queue MR control structure.
- * @param mbuf
- *   Pointer to source mbuf, to search in.
- *
- * @return
- *   Searched LKey on success, UINT32_MAX on no match.
- */
-static inline uint32_t
-mlx5_regex_mb2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
-		 struct rte_mbuf *mbuf)
-{
-	return mlx5_mr_mb2mr(priv->cdev, 0, mr_ctrl, mbuf);
-}
-
 static inline void
 __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj,
 	   struct rte_regex_ops *op, struct mlx5_regex_job *job,
@@ -180,7 +160,7 @@  prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 	struct mlx5_klm klm;
 
 	klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
-	klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, op->mbuf);
+	klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->mbuf, 0);
 	klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
 	__prep_one(priv, qp_obj, op, job, qp_obj->pi, &klm);
 	qp_obj->db_pi = qp_obj->pi;
@@ -349,9 +329,8 @@  prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 			while (mbuf) {
 				addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
 				/* Build indirect mkey seg's KLM. */
-				mkey_klm->mkey = mlx5_regex_mb2mr(priv,
-								  &qp->mr_ctrl,
-								  mbuf);
+				mkey_klm->mkey = mlx5_mr_mb2mr(&qp->mr_ctrl,
+							       mbuf, 0);
 				mkey_klm->address = rte_cpu_to_be_64(addr);
 				mkey_klm->byte_count = rte_cpu_to_be_32
 						(rte_pktmbuf_data_len(mbuf));
@@ -368,7 +347,7 @@  prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 			klm.byte_count = scatter_size;
 		} else {
 			/* The single mubf case. Build the KLM directly. */
-			klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, mbuf);
+			klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf, 0);
 			klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
 			klm.byte_count = rte_pktmbuf_data_len(mbuf);
 		}