diff mbox

[dpdk-dev,3/4] net/mlx5: support mempool of multi-mem segments

Message ID 20180115065420.44065-3-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Headers show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Xueming Li Jan. 15, 2018, 6:54 a.m. UTC
Previously, mempool of multiple memory segments would lead to MR
registration error "mempool not virtually contiguous". This patch
support multi-segments mempool by registering each memory segment of
mempool.

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/mlx5.h         |   4 +-
 drivers/net/mlx5/mlx5_mr.c      | 284 ++++++++++++++--------------------------
 drivers/net/mlx5/mlx5_rxq.c     |  29 ++--
 drivers/net/mlx5/mlx5_rxtx.h    |  48 ++++---
 drivers/net/mlx5/mlx5_trigger.c |   7 -
 5 files changed, 143 insertions(+), 229 deletions(-)

Comments

Nélio Laranjeiro Jan. 15, 2018, 4:29 p.m. UTC | #1
Hi Xueming,

On Mon, Jan 15, 2018 at 02:54:19PM +0800, Xueming Li wrote:
> Previously, mempool of multiple memory segments would lead to MR
> registration error "mempool not virtually contiguous". This patch
> support multi-segments mempool by registering each memory segment of
> mempool.
> 
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.h         |   4 +-
>  drivers/net/mlx5/mlx5_mr.c      | 284 ++++++++++++++--------------------------
>  drivers/net/mlx5/mlx5_rxq.c     |  29 ++--
>  drivers/net/mlx5/mlx5_rxtx.h    |  48 ++++---
>  drivers/net/mlx5/mlx5_trigger.c |   7 -
>  5 files changed, 143 insertions(+), 229 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 8ee522069..a6cd1474c 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -316,9 +316,11 @@ int priv_socket_connect(struct priv *priv);
>  
>  /* mlx5_mr.c */
>  
> -struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
> +struct mlx5_mr *priv_mr_new(struct priv *priv, struct rte_mempool *mp,
> +			    uintptr_t addr);
>  struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
>  int priv_mr_release(struct priv *, struct mlx5_mr *);
>  int priv_mr_verify(struct priv *);
> +struct mlx5_mr *priv_mr_lookup(struct priv *priv, uintptr_t addr);
>  
>  #endif /* RTE_PMD_MLX5_H_ */
> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> index 3c80fbb89..8d8fabea1 100644
> --- a/drivers/net/mlx5/mlx5_mr.c
> +++ b/drivers/net/mlx5/mlx5_mr.c
> @@ -47,158 +47,89 @@
>  #include "mlx5.h"
>  #include "mlx5_rxtx.h"
>  
> -struct mlx5_check_mempool_data {
> -	int ret;
> -	char *start;
> -	char *end;
> +struct mlx5_mempool_cb_data {
> +	uintptr_t addr;
> +	struct priv *priv;
> +	int error;
> +	struct mlx5_mr *mr;
>  };
>  
> -/* Called by mlx5_check_mempool() when iterating the memory chunks. */
> +/* Called by priv_mr_new() when iterating the memory chunks. */
>  static void
> -mlx5_check_mempool_cb(struct rte_mempool *mp,
> -		      void *opaque, struct rte_mempool_memhdr *memhdr,
> -		      unsigned int mem_idx)
> +mlx5_mempool_register_cb(struct rte_mempool *mp, void *opaque,
> +			 struct rte_mempool_memhdr *memhdr,
> +			 unsigned int mem_idx __rte_unused)
>  {
> -	struct mlx5_check_mempool_data *data = opaque;
> -
> -	(void)mp;
> -	(void)mem_idx;
> +	struct mlx5_mempool_cb_data *cb = opaque;
> +	struct priv *priv = cb->priv;
> +	struct mlx5_mr *mr;
>  
> -	/* It already failed, skip the next chunks. */
> -	if (data->ret != 0)
> +	if (cb->error) /* skip on previous error */
>  		return;
> -	/* It is the first chunk. */
> -	if (data->start == NULL && data->end == NULL) {
> -		data->start = memhdr->addr;
> -		data->end = data->start + memhdr->len;
> +	if (cb->addr && (cb->addr < (uintptr_t)memhdr->addr ||
> +			 cb->addr > (uintptr_t)memhdr->addr + memhdr->len))
> +		return; /* not target mem segment */
> +	mr = priv_mr_lookup(priv, (uintptr_t)memhdr->addr);
> +	if (mr) { /* memory already registered */
> +		cb->mr = mr;
>  		return;
>  	}
> -	if (data->end == memhdr->addr) {
> -		data->end += memhdr->len;
> +	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
> +	if (!mr) {
> +		DEBUG("unable to allocate MR, pool %p:%u register failed.",
> +		      (void *)mp, mem_idx);
> +		cb->error = -ENOMEM;
>  		return;
>  	}
> -	if (data->start == (char *)memhdr->addr + memhdr->len) {
> -		data->start -= memhdr->len;
> +	mr->mr = ibv_reg_mr(priv->pd, memhdr->addr, memhdr->len,
> +			    IBV_ACCESS_LOCAL_WRITE);
> +	if (!mr->mr) {
> +		ERROR("unable to register MR, ibv_reg_mr() failed.");
> +		cb->error = -1;
>  		return;
>  	}
> -	/* Error, mempool is not virtually contiguous. */
> -	data->ret = -1;
> -}
> -
> -/**
> - * Check if a mempool can be used: it must be virtually contiguous.
> - *
> - * @param[in] mp
> - *   Pointer to memory pool.
> - * @param[out] start
> - *   Pointer to the start address of the mempool virtual memory area
> - * @param[out] end
> - *   Pointer to the end address of the mempool virtual memory area
> - *
> - * @return
> - *   0 on success (mempool is virtually contiguous), -1 on error.
> - */
> -static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
> -	uintptr_t *end)
> -{
> -	struct mlx5_check_mempool_data data;
> -
> -	memset(&data, 0, sizeof(data));
> -	rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data);
> -	*start = (uintptr_t)data.start;
> -	*end = (uintptr_t)data.end;
> -
> -	return data.ret;
> +	mr->mp = mp;
> +	DEBUG("mempool %p:%u area start=%p size=%zu lkey=0x%08x",
> +	      (void *)mp, mem_idx, memhdr->addr, memhdr->len, mr->mr->lkey);
> +	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
> +	mr->start = (uintptr_t)memhdr->addr;
> +	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
> +	rte_atomic32_inc(&mr->refcnt);
> +	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
> +	      (void *)mr, rte_atomic32_read(&mr->refcnt));
> +	LIST_INSERT_HEAD(&priv->mr, mr, next);
> +	cb->mr = mr;
>  }
>  
>  /**
> - * Register a Memory Region (MR) <-> Memory Pool (MP) association in
> - * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
> - *
> - * This function should only be called by txq_mp2mr().
> + * Lookup or register a Memory Region (MR).
>   *
>   * @param priv
> - *   Pointer to private structure.
> - * @param txq
> - *   Pointer to TX queue structure.
> - * @param[in] mp
> - *   Memory Pool for which a Memory Region lkey must be returned.
> - * @param idx
> - *   Index of the next available entry.
> + *   Pointer to priv structure.
> + * @param mp
> + *   Pointer to the memory pool to register.
> + * @param addr
> + *   Address to register
>   *
>   * @return
>   *   mr on success, NULL on failure.
>   */
>  struct mlx5_mr*
> -priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
> -		   struct rte_mempool *mp, unsigned int idx)
> +mlx5_mp2mr(struct priv *priv, struct rte_mempool *mp, uintptr_t addr)
>  {
> -	struct mlx5_txq_ctrl *txq_ctrl =
> -		container_of(txq, struct mlx5_txq_ctrl, txq);
>  	struct mlx5_mr *mr;
>  
> -	/* Add a new entry, register MR first. */
> -	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
> -	      (void *)txq_ctrl, mp->name, (void *)mp);
> -	mr = priv_mr_get(priv, mp);
> -	if (mr == NULL)
> -		mr = priv_mr_new(priv, mp);
> -	if (unlikely(mr == NULL)) {
> -		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
> -		      (void *)txq_ctrl);
> -		return NULL;
> -	}
> -	if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
> -		/* Table is full, remove oldest entry. */
> -		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
> -		      (void *)txq_ctrl);
> -		--idx;
> -		priv_mr_release(priv, txq->mp2mr[0]);
> -		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
> -			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
> +	priv_lock(priv);
> +	mr = priv_mr_lookup(priv, addr);
> +	if (!mr) {
> +		mr = priv_mr_new(priv, mp, addr);
> +		if (mr)
> +			rte_atomic32_inc(&mr->refcnt);
>  	}
> -	/* Store the new entry. */
> -	txq_ctrl->txq.mp2mr[idx] = mr;
> -	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
> -	      (void *)txq_ctrl, mp->name, (void *)mp,
> -	      txq_ctrl->txq.mp2mr[idx]->lkey);
> -	return mr;
> -}
> -
> -/**
> - * Register a Memory Region (MR) <-> Memory Pool (MP) association in
> - * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
> - *
> - * This function should only be called by txq_mp2mr().
> - *
> - * @param txq
> - *   Pointer to TX queue structure.
> - * @param[in] mp
> - *   Memory Pool for which a Memory Region lkey must be returned.
> - * @param idx
> - *   Index of the next available entry.
> - *
> - * @return
> - *   mr on success, NULL on failure.
> - */
> -struct mlx5_mr*
> -mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
> -		   unsigned int idx)
> -{
> -	struct mlx5_txq_ctrl *txq_ctrl =
> -		container_of(txq, struct mlx5_txq_ctrl, txq);
> -	struct mlx5_mr *mr;
> -
> -	priv_lock(txq_ctrl->priv);
> -	mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
> -	priv_unlock(txq_ctrl->priv);
> +	priv_unlock(priv);
>  	return mr;
>  }
>  
> -struct mlx5_mp2mr_mbuf_check_data {
> -	int ret;
> -};
> -
>  /**
>   * Callback function for rte_mempool_obj_iter() to check whether a given
>   * mempool object looks like a mbuf.
> @@ -214,10 +145,10 @@ struct mlx5_mp2mr_mbuf_check_data {
>   *   Object index, unused.
>   */
>  static void
> -txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
> +mp2mr_mbuf_check_cb(struct rte_mempool *mp, void *arg, void *obj,
>  	uint32_t index __rte_unused)
>  {
> -	struct mlx5_mp2mr_mbuf_check_data *data = arg;
> +	struct mlx5_mempool_cb_data *data = arg;
>  	struct rte_mbuf *buf = obj;
>  
>  	/*
> @@ -225,7 +156,7 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
>  	 * pointer is valid.
>  	 */
>  	if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
> -		data->ret = -1;
> +		data->error = -1;
>  }
>  
>  /**
> @@ -241,21 +172,14 @@ void
>  mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
>  {
>  	struct priv *priv = (struct priv *)arg;
> -	struct mlx5_mp2mr_mbuf_check_data data = {
> -		.ret = 0,
> -	};
> -	struct mlx5_mr *mr;
> +	struct mlx5_mempool_cb_data data = {0};
>  
> -	/* Register mempool only if the first element looks like a mbuf. */
> -	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
> -			data.ret == -1)
> +	/* Register mempool only if all element looks like a mbuf. */
> +	/* TODO less efficient to iterate all objects in pool */
> +	if (rte_mempool_obj_iter(mp, mp2mr_mbuf_check_cb, &data) == 0 ||
> +			data.error == -1)
>  		return;

What are the performance impact of such lookup with several chunks in
the datapath?

> -	mr = priv_mr_get(priv, mp);
> -	if (mr) {
> -		priv_mr_release(priv, mr);
> -		return;
> -	}
> -	priv_mr_new(priv, mp);
> +	priv_mr_new(priv, mp, 0);
>  }
>  
>  /**
> @@ -266,59 +190,30 @@ mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
>   *   Pointer to private structure.
>   * @param mp
>   *   Pointer to the memory pool to register.
> + * @param addr
> + *   Address to register
>   * @return
> - *   The memory region on success.
> + *   Registered MR or the last if multiple MR registered.
>   */
> -struct mlx5_mr*
> -priv_mr_new(struct priv *priv, struct rte_mempool *mp)
> +struct mlx5_mr *
> +priv_mr_new(struct priv *priv, struct rte_mempool *mp, uintptr_t addr)
>  {
> -	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> -	uintptr_t start;
> -	uintptr_t end;
> -	unsigned int i;
> -	struct mlx5_mr *mr;
> +	struct mlx5_mempool_cb_data cb_data = {
> +		.priv = priv,
> +		.addr = addr,
> +	};
>  
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		rte_panic("Please init mempool before rte_eth_dev_start() in primary process: %s",
>  			  mp->name);
> -	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
> -	if (!mr) {
> -		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
> -		return NULL;
> -	}
> -	if (mlx5_check_mempool(mp, &start, &end) != 0) {
> -		ERROR("mempool %p: not virtually contiguous",
> -		      (void *)mp);
> +	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
> +	      (void *)priv, mp->name, (void *)mp);
> +	rte_mempool_mem_iter(mp, mlx5_mempool_register_cb, &cb_data);
> +	if (cb_data.error) {
> +		DEBUG("%p: unable to configure MR.", (void *)priv);
>  		return NULL;
>  	}
> -	DEBUG("mempool %p area start=%p end=%p size=%zu",
> -	      (void *)mp, (void *)start, (void *)end,
> -	      (size_t)(end - start));
> -	/* Round start and end to page boundary if found in memory segments. */
> -	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
> -		uintptr_t addr = (uintptr_t)ms[i].addr;
> -		size_t len = ms[i].len;
> -		unsigned int align = ms[i].hugepage_sz;
> -
> -		if ((start > addr) && (start < addr + len))
> -			start = RTE_ALIGN_FLOOR(start, align);
> -		if ((end > addr) && (end < addr + len))
> -			end = RTE_ALIGN_CEIL(end, align);
> -	}
> -	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
> -	      (void *)mp, (void *)start, (void *)end,
> -	      (size_t)(end - start));
> -	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
> -			    IBV_ACCESS_LOCAL_WRITE);
> -	mr->mp = mp;
> -	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
> -	mr->start = start;
> -	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
> -	rte_atomic32_inc(&mr->refcnt);
> -	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
> -	      (void *)mr, rte_atomic32_read(&mr->refcnt));
> -	LIST_INSERT_HEAD(&priv->mr, mr, next);
> -	return mr;
> +	return cb_data.mr;
>  }
>  
>  /**
> @@ -396,3 +291,26 @@ priv_mr_verify(struct priv *priv)
>  	}
>  	return ret;
>  }
> +
> +/**
> + * Memory Region (MR) lookup by address
> + *
> + * @param priv
> + *   Pointer to priv structure.
> + * @param[in] addr
> + *   Address to lookup.
> + * @return
> + *   mr on success, (uint32_t)-1 on failure.
> + */
> +struct mlx5_mr *
> +priv_mr_lookup(struct priv *priv, uintptr_t addr)
> +{
> +	struct mlx5_mr *mr;
> +
> +	LIST_FOREACH(mr, &priv->mr, next) {
> +		if (addr >= mr->start && addr <= mr->end)
> +			return mr;
> +	}
> +	return NULL;
> +}
> +
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 950472754..a581a2d61 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -652,6 +652,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
>  	int ret = 0;
>  	struct mlx5dv_obj obj;
>  	struct mlx5_dev_config *config = &priv->config;
> +	struct mlx5_mr *last_mr = NULL;
>  
>  	assert(rxq_data);
>  	assert(!rxq_ctrl->ibv);
> @@ -664,13 +665,9 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
>  	}
>  	tmpl->rxq_ctrl = rxq_ctrl;
>  	/* Use the entire RX mempool as the memory region. */
> -	tmpl->mr = priv_mr_get(priv, rxq_data->mp);
> -	if (!tmpl->mr) {
> -		tmpl->mr = priv_mr_new(priv, rxq_data->mp);
> -		if (!tmpl->mr) {
> -			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
> -			goto error;
> -		}
> +	if (!priv_mr_new(priv, rxq_data->mp, 0)) {
> +		ERROR("%p: MR creation failure", (void *)rxq_ctrl);
> +		goto error;
>  	}
>  	if (rxq_ctrl->irq) {
>  		tmpl->channel = ibv_create_comp_channel(priv->ctx);
> @@ -786,14 +783,17 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
>  	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
>  		struct rte_mbuf *buf = (*rxq_data->elts)[i];
>  		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
> +		uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
>  
> +		if (!last_mr || addr < last_mr->start || addr > last_mr->end)
> +			last_mr = priv_mr_lookup(priv, addr);
> +		assert(last_mr);
>  		/* scat->addr must be able to store a pointer. */
>  		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
>  		*scat = (struct mlx5_wqe_data_seg){
> -			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
> -								  uintptr_t)),
> +			.addr = rte_cpu_to_be_64(addr),
>  			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
> -			.lkey = tmpl->mr->lkey,
> +			.lkey = last_mr->lkey,
>  		};
>  	}
>  	rxq_data->rq_db = rwq.dbrec;
> @@ -826,8 +826,6 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
>  		claim_zero(ibv_destroy_cq(tmpl->cq));
>  	if (tmpl->channel)
>  		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
> -	if (tmpl->mr)
> -		priv_mr_release(priv, tmpl->mr);
>  	return NULL;
>  }
>  
> @@ -877,17 +875,12 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
>  int
>  mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
>  {
> -	int ret;
> -
>  	assert(rxq_ibv);
>  	assert(rxq_ibv->wq);
>  	assert(rxq_ibv->cq);
> -	assert(rxq_ibv->mr);
> -	ret = priv_mr_release(priv, rxq_ibv->mr);
> -	if (!ret)
> -		rxq_ibv->mr = NULL;
>  	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
>  	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
> +	(void)priv;
>  	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
>  		rxq_free_elts(rxq_ibv->rxq_ctrl);
>  		claim_zero(ibv_destroy_wq(rxq_ibv->wq));
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 18e1d26f3..6589a662e 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -142,7 +142,6 @@ struct mlx5_rxq_ibv {
>  	struct ibv_cq *cq; /* Completion Queue. */
>  	struct ibv_wq *wq; /* Work Queue. */
>  	struct ibv_comp_channel *channel;
> -	struct mlx5_mr *mr; /* Memory Region (for mp). */
>  };
>  
>  /* RX queue control descriptor. */
> @@ -324,10 +323,8 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
>  /* mlx5_mr.c */
>  
>  void mlx5_mp2mr_iter(struct rte_mempool *, void *);
> -struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
> -				   struct rte_mempool *, unsigned int);
> -struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
> -				   unsigned int);
> +struct mlx5_mr *mlx5_mp2mr(struct priv *priv, struct rte_mempool *mp,
> +			   uintptr_t addr);
>  
>  #ifndef NDEBUG
>  /**
> @@ -523,7 +520,7 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
>   * @return
>   *   Memory pool where data is located for given mbuf.
>   */
> -static struct rte_mempool *
> +static __rte_always_inline struct rte_mempool *
>  mlx5_tx_mb2mp(struct rte_mbuf *buf)
>  {
>  	if (unlikely(RTE_MBUF_INDIRECT(buf)))
> @@ -552,30 +549,41 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
>  	struct mlx5_mr *mr;
>  
>  	assert(i < RTE_DIM(txq->mp2mr));
> -	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
> +	if (likely(txq->mp2mr[i] && txq->mp2mr[i]->start <= addr &&
> +		   txq->mp2mr[i]->end >= addr))
>  		return txq->mp2mr[i]->lkey;
>  	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
> -		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
> -			/* Unknown MP, add a new MR for it. */
> +		if (txq->mp2mr[i] == NULL)
>  			break;
> -		}
> +		if (i == txq->mr_cache_idx)
> +			continue;
>  		if (txq->mp2mr[i]->start <= addr &&
>  		    txq->mp2mr[i]->end >= addr) {
> +#ifndef NDEBUG
> +			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +				assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey)
> +				       == txq->mp2mr[i]->lkey);
> +#endif
>  			txq->mr_cache_idx = i;
>  			return txq->mp2mr[i]->lkey;
>  		}
>  	}
> -	txq->mr_cache_idx = 0;
> -	mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
> -	/*
> -	 * Request the reference to use in this queue, the original one is
> -	 * kept by the control plane.
> -	 */
> -	if (mr) {
> -		rte_atomic32_inc(&mr->refcnt);
> -		return mr->lkey;
> +	mr = mlx5_mp2mr(container_of(txq, struct mlx5_txq_ctrl, txq)->priv,
> +			mlx5_tx_mb2mp(mb), addr);
> +	assert(mr);
> +	/* recent used MR first */
> +	if (i) {
> +		if (i >= RTE_DIM(txq->mp2mr)) {
> +			i = RTE_DIM(txq->mp2mr) - 1;
> +			DEBUG("TXQ MR cache full, please consider increasing CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE in config file");

Please respect the coding style of the PMD/file split those lines.

> +		}
> +		memmove(&txq->mp2mr[1], &txq->mp2mr[0],
> +			i * sizeof(sizeof(txq->mp2mr[0])));
>  	}
> -	return (uint32_t)-1;
> +	/* Store the new entry. */
> +	txq->mp2mr[0] = mr;
> +	txq->mr_cache_idx = 0;
> +	return mr ? mr->lkey : (uint32_t)-1;
>  }
>  
>  /**
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index 1a20967a2..920cc0f46 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,17 +58,10 @@ priv_txq_start(struct priv *priv)
>  
>  	/* Add memory regions to Tx queues. */
>  	for (i = 0; i != priv->txqs_n; ++i) {
> -		unsigned int idx = 0;
> -		struct mlx5_mr *mr;
>  		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
>  
>  		if (!txq_ctrl)
>  			continue;
> -		LIST_FOREACH(mr, &priv->mr, next) {
> -			priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
> -			if (idx == MLX5_PMD_TX_MP_CACHE)
> -				break;
> -		}
>  		txq_alloc_elts(txq_ctrl);
>  		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
>  		if (!txq_ctrl->ibv) {
> -- 
> 2.13.3
> 

Thanks,
diff mbox

Patch

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8ee522069..a6cd1474c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -316,9 +316,11 @@  int priv_socket_connect(struct priv *priv);
 
 /* mlx5_mr.c */
 
-struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr *priv_mr_new(struct priv *priv, struct rte_mempool *mp,
+			    uintptr_t addr);
 struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
 int priv_mr_release(struct priv *, struct mlx5_mr *);
 int priv_mr_verify(struct priv *);
+struct mlx5_mr *priv_mr_lookup(struct priv *priv, uintptr_t addr);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 3c80fbb89..8d8fabea1 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -47,158 +47,89 @@ 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
 
-struct mlx5_check_mempool_data {
-	int ret;
-	char *start;
-	char *end;
+struct mlx5_mempool_cb_data {
+	uintptr_t addr;
+	struct priv *priv;
+	int error;
+	struct mlx5_mr *mr;
 };
 
-/* Called by mlx5_check_mempool() when iterating the memory chunks. */
+/* Called by priv_mr_new() when iterating the memory chunks. */
 static void
-mlx5_check_mempool_cb(struct rte_mempool *mp,
-		      void *opaque, struct rte_mempool_memhdr *memhdr,
-		      unsigned int mem_idx)
+mlx5_mempool_register_cb(struct rte_mempool *mp, void *opaque,
+			 struct rte_mempool_memhdr *memhdr,
+			 unsigned int mem_idx __rte_unused)
 {
-	struct mlx5_check_mempool_data *data = opaque;
-
-	(void)mp;
-	(void)mem_idx;
+	struct mlx5_mempool_cb_data *cb = opaque;
+	struct priv *priv = cb->priv;
+	struct mlx5_mr *mr;
 
-	/* It already failed, skip the next chunks. */
-	if (data->ret != 0)
+	if (cb->error) /* skip on previous error */
 		return;
-	/* It is the first chunk. */
-	if (data->start == NULL && data->end == NULL) {
-		data->start = memhdr->addr;
-		data->end = data->start + memhdr->len;
+	if (cb->addr && (cb->addr < (uintptr_t)memhdr->addr ||
+			 cb->addr > (uintptr_t)memhdr->addr + memhdr->len))
+		return; /* not target mem segment */
+	mr = priv_mr_lookup(priv, (uintptr_t)memhdr->addr);
+	if (mr) { /* memory already registered */
+		cb->mr = mr;
 		return;
 	}
-	if (data->end == memhdr->addr) {
-		data->end += memhdr->len;
+	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+	if (!mr) {
+		DEBUG("unable to allocate MR, pool %p:%u register failed.",
+		      (void *)mp, mem_idx);
+		cb->error = -ENOMEM;
 		return;
 	}
-	if (data->start == (char *)memhdr->addr + memhdr->len) {
-		data->start -= memhdr->len;
+	mr->mr = ibv_reg_mr(priv->pd, memhdr->addr, memhdr->len,
+			    IBV_ACCESS_LOCAL_WRITE);
+	if (!mr->mr) {
+		ERROR("unable to register MR, ibv_reg_mr() failed.");
+		cb->error = -1;
 		return;
 	}
-	/* Error, mempool is not virtually contiguous. */
-	data->ret = -1;
-}
-
-/**
- * Check if a mempool can be used: it must be virtually contiguous.
- *
- * @param[in] mp
- *   Pointer to memory pool.
- * @param[out] start
- *   Pointer to the start address of the mempool virtual memory area
- * @param[out] end
- *   Pointer to the end address of the mempool virtual memory area
- *
- * @return
- *   0 on success (mempool is virtually contiguous), -1 on error.
- */
-static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
-	uintptr_t *end)
-{
-	struct mlx5_check_mempool_data data;
-
-	memset(&data, 0, sizeof(data));
-	rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data);
-	*start = (uintptr_t)data.start;
-	*end = (uintptr_t)data.end;
-
-	return data.ret;
+	mr->mp = mp;
+	DEBUG("mempool %p:%u area start=%p size=%zu lkey=0x%08x",
+	      (void *)mp, mem_idx, memhdr->addr, memhdr->len, mr->mr->lkey);
+	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
+	mr->start = (uintptr_t)memhdr->addr;
+	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+	rte_atomic32_inc(&mr->refcnt);
+	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	LIST_INSERT_HEAD(&priv->mr, mr, next);
+	cb->mr = mr;
 }
 
 /**
- * Register a Memory Region (MR) <-> Memory Pool (MP) association in
- * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
- *
- * This function should only be called by txq_mp2mr().
+ * Lookup or register a Memory Region (MR).
  *
  * @param priv
- *   Pointer to private structure.
- * @param txq
- *   Pointer to TX queue structure.
- * @param[in] mp
- *   Memory Pool for which a Memory Region lkey must be returned.
- * @param idx
- *   Index of the next available entry.
+ *   Pointer to priv structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @param addr
+ *   Address to register
  *
  * @return
  *   mr on success, NULL on failure.
  */
 struct mlx5_mr*
-priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
-		   struct rte_mempool *mp, unsigned int idx)
+mlx5_mp2mr(struct priv *priv, struct rte_mempool *mp, uintptr_t addr)
 {
-	struct mlx5_txq_ctrl *txq_ctrl =
-		container_of(txq, struct mlx5_txq_ctrl, txq);
 	struct mlx5_mr *mr;
 
-	/* Add a new entry, register MR first. */
-	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
-	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = priv_mr_get(priv, mp);
-	if (mr == NULL)
-		mr = priv_mr_new(priv, mp);
-	if (unlikely(mr == NULL)) {
-		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
-		      (void *)txq_ctrl);
-		return NULL;
-	}
-	if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
-		/* Table is full, remove oldest entry. */
-		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
-		      (void *)txq_ctrl);
-		--idx;
-		priv_mr_release(priv, txq->mp2mr[0]);
-		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
-			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
+	priv_lock(priv);
+	mr = priv_mr_lookup(priv, addr);
+	if (!mr) {
+		mr = priv_mr_new(priv, mp, addr);
+		if (mr)
+			rte_atomic32_inc(&mr->refcnt);
 	}
-	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx] = mr;
-	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
-	      (void *)txq_ctrl, mp->name, (void *)mp,
-	      txq_ctrl->txq.mp2mr[idx]->lkey);
-	return mr;
-}
-
-/**
- * Register a Memory Region (MR) <-> Memory Pool (MP) association in
- * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
- *
- * This function should only be called by txq_mp2mr().
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param[in] mp
- *   Memory Pool for which a Memory Region lkey must be returned.
- * @param idx
- *   Index of the next available entry.
- *
- * @return
- *   mr on success, NULL on failure.
- */
-struct mlx5_mr*
-mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
-		   unsigned int idx)
-{
-	struct mlx5_txq_ctrl *txq_ctrl =
-		container_of(txq, struct mlx5_txq_ctrl, txq);
-	struct mlx5_mr *mr;
-
-	priv_lock(txq_ctrl->priv);
-	mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
-	priv_unlock(txq_ctrl->priv);
+	priv_unlock(priv);
 	return mr;
 }
 
-struct mlx5_mp2mr_mbuf_check_data {
-	int ret;
-};
-
 /**
  * Callback function for rte_mempool_obj_iter() to check whether a given
  * mempool object looks like a mbuf.
@@ -214,10 +145,10 @@  struct mlx5_mp2mr_mbuf_check_data {
  *   Object index, unused.
  */
 static void
-txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+mp2mr_mbuf_check_cb(struct rte_mempool *mp, void *arg, void *obj,
 	uint32_t index __rte_unused)
 {
-	struct mlx5_mp2mr_mbuf_check_data *data = arg;
+	struct mlx5_mempool_cb_data *data = arg;
 	struct rte_mbuf *buf = obj;
 
 	/*
@@ -225,7 +156,7 @@  txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
 	 * pointer is valid.
 	 */
 	if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
-		data->ret = -1;
+		data->error = -1;
 }
 
 /**
@@ -241,21 +172,14 @@  void
 mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
 	struct priv *priv = (struct priv *)arg;
-	struct mlx5_mp2mr_mbuf_check_data data = {
-		.ret = 0,
-	};
-	struct mlx5_mr *mr;
+	struct mlx5_mempool_cb_data data = {0};
 
-	/* Register mempool only if the first element looks like a mbuf. */
-	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
-			data.ret == -1)
+	/* Register mempool only if all element looks like a mbuf. */
+	/* TODO less efficient to iterate all objects in pool */
+	if (rte_mempool_obj_iter(mp, mp2mr_mbuf_check_cb, &data) == 0 ||
+			data.error == -1)
 		return;
-	mr = priv_mr_get(priv, mp);
-	if (mr) {
-		priv_mr_release(priv, mr);
-		return;
-	}
-	priv_mr_new(priv, mp);
+	priv_mr_new(priv, mp, 0);
 }
 
 /**
@@ -266,59 +190,30 @@  mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
  *   Pointer to private structure.
  * @param mp
  *   Pointer to the memory pool to register.
+ * @param addr
+ *   Address to register
  * @return
- *   The memory region on success.
+ *   Registered MR or the last if multiple MR registered.
  */
-struct mlx5_mr*
-priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+struct mlx5_mr *
+priv_mr_new(struct priv *priv, struct rte_mempool *mp, uintptr_t addr)
 {
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-	struct mlx5_mr *mr;
+	struct mlx5_mempool_cb_data cb_data = {
+		.priv = priv,
+		.addr = addr,
+	};
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		rte_panic("Please init mempool before rte_eth_dev_start() in primary process: %s",
 			  mp->name);
-	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
-	if (!mr) {
-		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
-		return NULL;
-	}
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
+	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
+	      (void *)priv, mp->name, (void *)mp);
+	rte_mempool_mem_iter(mp, mlx5_mempool_register_cb, &cb_data);
+	if (cb_data.error) {
+		DEBUG("%p: unable to configure MR.", (void *)priv);
 		return NULL;
 	}
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
-			    IBV_ACCESS_LOCAL_WRITE);
-	mr->mp = mp;
-	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
-	mr->start = start;
-	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
-	rte_atomic32_inc(&mr->refcnt);
-	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
-	      (void *)mr, rte_atomic32_read(&mr->refcnt));
-	LIST_INSERT_HEAD(&priv->mr, mr, next);
-	return mr;
+	return cb_data.mr;
 }
 
 /**
@@ -396,3 +291,26 @@  priv_mr_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Memory Region (MR) lookup by address
+ *
+ * @param priv
+ *   Pointer to priv structure.
+ * @param[in] addr
+ *   Address to lookup.
+ * @return
+ *   mr on success, (uint32_t)-1 on failure.
+ */
+struct mlx5_mr *
+priv_mr_lookup(struct priv *priv, uintptr_t addr)
+{
+	struct mlx5_mr *mr;
+
+	LIST_FOREACH(mr, &priv->mr, next) {
+		if (addr >= mr->start && addr <= mr->end)
+			return mr;
+	}
+	return NULL;
+}
+
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 950472754..a581a2d61 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -652,6 +652,7 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	int ret = 0;
 	struct mlx5dv_obj obj;
 	struct mlx5_dev_config *config = &priv->config;
+	struct mlx5_mr *last_mr = NULL;
 
 	assert(rxq_data);
 	assert(!rxq_ctrl->ibv);
@@ -664,13 +665,9 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	}
 	tmpl->rxq_ctrl = rxq_ctrl;
 	/* Use the entire RX mempool as the memory region. */
-	tmpl->mr = priv_mr_get(priv, rxq_data->mp);
-	if (!tmpl->mr) {
-		tmpl->mr = priv_mr_new(priv, rxq_data->mp);
-		if (!tmpl->mr) {
-			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
-			goto error;
-		}
+	if (!priv_mr_new(priv, rxq_data->mp, 0)) {
+		ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+		goto error;
 	}
 	if (rxq_ctrl->irq) {
 		tmpl->channel = ibv_create_comp_channel(priv->ctx);
@@ -786,14 +783,17 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
 		struct rte_mbuf *buf = (*rxq_data->elts)[i];
 		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+		uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
 
+		if (!last_mr || addr < last_mr->start || addr > last_mr->end)
+			last_mr = priv_mr_lookup(priv, addr);
+		assert(last_mr);
 		/* scat->addr must be able to store a pointer. */
 		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
 		*scat = (struct mlx5_wqe_data_seg){
-			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
-								  uintptr_t)),
+			.addr = rte_cpu_to_be_64(addr),
 			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
-			.lkey = tmpl->mr->lkey,
+			.lkey = last_mr->lkey,
 		};
 	}
 	rxq_data->rq_db = rwq.dbrec;
@@ -826,8 +826,6 @@  mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 		claim_zero(ibv_destroy_cq(tmpl->cq));
 	if (tmpl->channel)
 		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
-	if (tmpl->mr)
-		priv_mr_release(priv, tmpl->mr);
 	return NULL;
 }
 
@@ -877,17 +875,12 @@  mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
 int
 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 {
-	int ret;
-
 	assert(rxq_ibv);
 	assert(rxq_ibv->wq);
 	assert(rxq_ibv->cq);
-	assert(rxq_ibv->mr);
-	ret = priv_mr_release(priv, rxq_ibv->mr);
-	if (!ret)
-		rxq_ibv->mr = NULL;
 	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
 	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+	(void)priv;
 	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
 		rxq_free_elts(rxq_ibv->rxq_ctrl);
 		claim_zero(ibv_destroy_wq(rxq_ibv->wq));
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 18e1d26f3..6589a662e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -142,7 +142,6 @@  struct mlx5_rxq_ibv {
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_wq *wq; /* Work Queue. */
 	struct ibv_comp_channel *channel;
-	struct mlx5_mr *mr; /* Memory Region (for mp). */
 };
 
 /* RX queue control descriptor. */
@@ -324,10 +323,8 @@  uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 /* mlx5_mr.c */
 
 void mlx5_mp2mr_iter(struct rte_mempool *, void *);
-struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
-				   struct rte_mempool *, unsigned int);
-struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
-				   unsigned int);
+struct mlx5_mr *mlx5_mp2mr(struct priv *priv, struct rte_mempool *mp,
+			   uintptr_t addr);
 
 #ifndef NDEBUG
 /**
@@ -523,7 +520,7 @@  mlx5_tx_complete(struct mlx5_txq_data *txq)
  * @return
  *   Memory pool where data is located for given mbuf.
  */
-static struct rte_mempool *
+static __rte_always_inline struct rte_mempool *
 mlx5_tx_mb2mp(struct rte_mbuf *buf)
 {
 	if (unlikely(RTE_MBUF_INDIRECT(buf)))
@@ -552,30 +549,41 @@  mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 	struct mlx5_mr *mr;
 
 	assert(i < RTE_DIM(txq->mp2mr));
-	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+	if (likely(txq->mp2mr[i] && txq->mp2mr[i]->start <= addr &&
+		   txq->mp2mr[i]->end >= addr))
 		return txq->mp2mr[i]->lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
-			/* Unknown MP, add a new MR for it. */
+		if (txq->mp2mr[i] == NULL)
 			break;
-		}
+		if (i == txq->mr_cache_idx)
+			continue;
 		if (txq->mp2mr[i]->start <= addr &&
 		    txq->mp2mr[i]->end >= addr) {
+#ifndef NDEBUG
+			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+				assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey)
+				       == txq->mp2mr[i]->lkey);
+#endif
 			txq->mr_cache_idx = i;
 			return txq->mp2mr[i]->lkey;
 		}
 	}
-	txq->mr_cache_idx = 0;
-	mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
-	/*
-	 * Request the reference to use in this queue, the original one is
-	 * kept by the control plane.
-	 */
-	if (mr) {
-		rte_atomic32_inc(&mr->refcnt);
-		return mr->lkey;
+	mr = mlx5_mp2mr(container_of(txq, struct mlx5_txq_ctrl, txq)->priv,
+			mlx5_tx_mb2mp(mb), addr);
+	assert(mr);
+	/* recent used MR first */
+	if (i) {
+		if (i >= RTE_DIM(txq->mp2mr)) {
+			i = RTE_DIM(txq->mp2mr) - 1;
+			DEBUG("TXQ MR cache full, please consider increasing CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE in config file");
+		}
+		memmove(&txq->mp2mr[1], &txq->mp2mr[0],
+			i * sizeof(sizeof(txq->mp2mr[0])));
 	}
-	return (uint32_t)-1;
+	/* Store the new entry. */
+	txq->mp2mr[0] = mr;
+	txq->mr_cache_idx = 0;
+	return mr ? mr->lkey : (uint32_t)-1;
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 1a20967a2..920cc0f46 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,17 +58,10 @@  priv_txq_start(struct priv *priv)
 
 	/* Add memory regions to Tx queues. */
 	for (i = 0; i != priv->txqs_n; ++i) {
-		unsigned int idx = 0;
-		struct mlx5_mr *mr;
 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
 
 		if (!txq_ctrl)
 			continue;
-		LIST_FOREACH(mr, &priv->mr, next) {
-			priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
-			if (idx == MLX5_PMD_TX_MP_CACHE)
-				break;
-		}
 		txq_alloc_elts(txq_ctrl);
 		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
 		if (!txq_ctrl->ibv) {