[1/5] net/mlx5: fix race condition in counter pool resizing

Message ID 20221031160824.330200-2-michaelba@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: some counter fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Oct. 31, 2022, 4:08 p.m. UTC
  Counter management structure has array of counter pools. This array is
invalid in management structure initialization and grows on demand.

The resizing include:
1. Allocate memory for the new size.
2. Copy the existing data to the new memory.
3. Move the pointer to the new memory.
4. Free the old memory.

The third step can be performed before for this function, and compiler
may do that, but another thread might read the pointer before coping and
read invalid data or even crash.

This patch allocates memory for this array once in management structure
initialization and limit the counters number by 16M.

Fixes: 3aa279157fa0 ("net/mlx5: synchronize flow counter pool creation")
Cc: suanmingm@nvidia.com
Cc: stable@dpdk.org

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5.c            | 28 +++++++++++++---
 drivers/net/mlx5/mlx5.h            |  7 ++--
 drivers/net/mlx5/mlx5_flow.c       | 24 +++++++-------
 drivers/net/mlx5/mlx5_flow_dv.c    | 53 +++++-------------------------
 drivers/net/mlx5/mlx5_flow_verbs.c | 23 +++----------
 5 files changed, 52 insertions(+), 83 deletions(-)
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 78234b116c..b85a56ec24 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -561,18 +561,34 @@  mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused)
  *
  * @param[in] sh
  *   Pointer to mlx5_dev_ctx_shared object to free
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
  */
-static void
+static int
 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 {
 	int i, j;
 
 	if (sh->config.dv_flow_en < 2) {
+		void *pools;
+
+		pools = mlx5_malloc(MLX5_MEM_ZERO,
+				    sizeof(struct mlx5_flow_counter_pool *) *
+				    MLX5_COUNTER_POOLS_MAX_NUM,
+				    0, SOCKET_ID_ANY);
+		if (!pools) {
+			DRV_LOG(ERR,
+				"Counter management allocation was failed.");
+			rte_errno = ENOMEM;
+			return -rte_errno;
+		}
 		memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng));
 		TAILQ_INIT(&sh->sws_cmng.flow_counters);
 		sh->sws_cmng.min_id = MLX5_CNT_BATCH_OFFSET;
 		sh->sws_cmng.max_id = -1;
 		sh->sws_cmng.last_pool_idx = POOL_IDX_INVALID;
+		sh->sws_cmng.pools = pools;
 		rte_spinlock_init(&sh->sws_cmng.pool_update_sl);
 		for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
 			TAILQ_INIT(&sh->sws_cmng.counters[i]);
@@ -598,6 +614,7 @@  mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 		sh->hws_max_log_bulk_sz = log_dcs;
 		sh->hws_max_nb_counters = max_nb_cnts;
 	}
+	return 0;
 }
 
 /**
@@ -655,8 +672,7 @@  mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 					claim_zero
 					 (mlx5_flow_os_destroy_flow_action
 					  (cnt->action));
-				if (fallback && MLX5_POOL_GET_CNT
-				    (pool, j)->dcs_when_free)
+				if (fallback && cnt->dcs_when_free)
 					claim_zero(mlx5_devx_cmd_destroy
 						   (cnt->dcs_when_free));
 			}
@@ -1572,8 +1588,12 @@  mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
 		if (err)
 			goto error;
 	}
+	err = mlx5_flow_counters_mng_init(sh);
+	if (err) {
+		DRV_LOG(ERR, "Fail to initialize counters manage.");
+		goto error;
+	}
 	mlx5_flow_aging_init(sh);
-	mlx5_flow_counters_mng_init(sh);
 	mlx5_flow_ipool_create(sh);
 	/* Add context to the global device list. */
 	LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9fcb71b69..cbe2d88b9e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -386,11 +386,10 @@  struct mlx5_hw_q {
 } __rte_cache_aligned;
 
 
-
-
+#define MLX5_COUNTER_POOLS_MAX_NUM (1 << 15)
 #define MLX5_COUNTERS_PER_POOL 512
 #define MLX5_MAX_PENDING_QUERIES 4
-#define MLX5_CNT_CONTAINER_RESIZE 64
+#define MLX5_CNT_MR_ALLOC_BULK 64
 #define MLX5_CNT_SHARED_OFFSET 0x80000000
 #define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \
 			   MLX5_CNT_BATCH_OFFSET)
@@ -549,7 +548,6 @@  TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool);
 /* Counter global management structure. */
 struct mlx5_flow_counter_mng {
 	volatile uint16_t n_valid; /* Number of valid pools. */
-	uint16_t n; /* Number of pools. */
 	uint16_t last_pool_idx; /* Last used pool index */
 	int min_id; /* The minimum counter ID in the pools. */
 	int max_id; /* The maximum counter ID in the pools. */
@@ -621,6 +619,7 @@  struct mlx5_aso_age_action {
 };
 
 #define MLX5_ASO_AGE_ACTIONS_PER_POOL 512
+#define MLX5_ASO_AGE_CONTAINER_RESIZE 64
 
 struct mlx5_aso_age_pool {
 	struct mlx5_devx_obj *flow_hit_aso_obj;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8e7d649d15..e25154199f 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -9063,7 +9063,7 @@  mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mem_mng;
 	volatile struct flow_counter_stats *raw_data;
-	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+	int raws_n = MLX5_CNT_MR_ALLOC_BULK + MLX5_MAX_PENDING_QUERIES;
 	int size = (sizeof(struct flow_counter_stats) *
 			MLX5_COUNTERS_PER_POOL +
 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
@@ -9101,7 +9101,7 @@  mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
 	}
 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
 		LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws,
-				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+				 mem_mng->raws + MLX5_CNT_MR_ALLOC_BULK + i,
 				 next);
 	LIST_INSERT_HEAD(&sh->sws_cmng.mem_mngs, mem_mng, next);
 	sh->sws_cmng.mem_mng = mem_mng;
@@ -9125,14 +9125,13 @@  mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
 {
 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
 	/* Resize statistic memory once used out. */
-	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+	if (!(pool->index % MLX5_CNT_MR_ALLOC_BULK) &&
 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
 		return -1;
 	}
 	rte_spinlock_lock(&pool->sl);
-	pool->raw = cmng->mem_mng->raws + pool->index %
-		    MLX5_CNT_CONTAINER_RESIZE;
+	pool->raw = cmng->mem_mng->raws + pool->index % MLX5_CNT_MR_ALLOC_BULK;
 	rte_spinlock_unlock(&pool->sl);
 	pool->raw_hw = NULL;
 	return 0;
@@ -9174,13 +9173,13 @@  void
 mlx5_flow_query_alarm(void *arg)
 {
 	struct mlx5_dev_ctx_shared *sh = arg;
-	int ret;
-	uint16_t pool_index = sh->sws_cmng.pool_index;
 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
+	uint16_t pool_index = cmng->pool_index;
 	struct mlx5_flow_counter_pool *pool;
 	uint16_t n_valid;
+	int ret;
 
-	if (sh->sws_cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+	if (cmng->pending_queries >= MLX5_MAX_PENDING_QUERIES)
 		goto set_alarm;
 	rte_spinlock_lock(&cmng->pool_update_sl);
 	pool = cmng->pools[pool_index];
@@ -9192,8 +9191,7 @@  mlx5_flow_query_alarm(void *arg)
 	if (pool->raw_hw)
 		/* There is a pool query in progress. */
 		goto set_alarm;
-	pool->raw_hw =
-		LIST_FIRST(&sh->sws_cmng.free_stat_raws);
+	pool->raw_hw = LIST_FIRST(&cmng->free_stat_raws);
 	if (!pool->raw_hw)
 		/* No free counter statistics raw memory. */
 		goto set_alarm;
@@ -9219,12 +9217,12 @@  mlx5_flow_query_alarm(void *arg)
 		goto set_alarm;
 	}
 	LIST_REMOVE(pool->raw_hw, next);
-	sh->sws_cmng.pending_queries++;
+	cmng->pending_queries++;
 	pool_index++;
 	if (pool_index >= n_valid)
 		pool_index = 0;
 set_alarm:
-	sh->sws_cmng.pool_index = pool_index;
+	cmng->pool_index = pool_index;
 	mlx5_set_query_alarm(sh);
 }
 
@@ -9755,7 +9753,7 @@  mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
 	}
 
 	/* get counter */
-	MLX5_ASSERT(cmng->n_valid <= cmng->n);
+	MLX5_ASSERT(cmng->n_valid <= MLX5_COUNTER_POOLS_MAX_NUM);
 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
 	for (j = 1; j <= max; j++) {
 		action = NULL;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 1e52278191..e77cbb862b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -6091,7 +6091,7 @@  flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
 
 	/* Decrease to original index and clear shared bit. */
 	idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
-	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n);
+	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < MLX5_COUNTER_POOLS_MAX_NUM);
 	pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
 	if (ppool)
@@ -6167,39 +6167,6 @@  flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id)
 	return pool;
 }
 
-/**
- * Resize a counter container.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- *
- * @return
- *   0 on success, otherwise negative errno value and rte_errno is set.
- */
-static int
-flow_dv_container_resize(struct rte_eth_dev *dev)
-{
-	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
-	void *old_pools = cmng->pools;
-	uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE;
-	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
-	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
-
-	if (!pools) {
-		rte_errno = ENOMEM;
-		return -ENOMEM;
-	}
-	if (old_pools)
-		memcpy(pools, old_pools, cmng->n *
-				       sizeof(struct mlx5_flow_counter_pool *));
-	cmng->n = resize;
-	cmng->pools = pools;
-	if (old_pools)
-		mlx5_free(old_pools);
-	return 0;
-}
-
 /**
  * Query a devx flow counter.
  *
@@ -6251,8 +6218,6 @@  _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  *   The devX counter handle.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
- * @param[in/out] cont_cur
- *   Pointer to the container pointer, it will be update in pool resize.
  *
  * @return
  *   The pool container pointer on success, NULL otherwise and rte_errno is set.
@@ -6264,9 +6229,14 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
-	bool fallback = priv->sh->sws_cmng.counter_fallback;
+	bool fallback = cmng->counter_fallback;
 	uint32_t size = sizeof(*pool);
 
+	if (cmng->n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
+		DRV_LOG(ERR, "All counter is in used, try again later.");
+		rte_errno = EAGAIN;
+		return NULL;
+	}
 	size += MLX5_COUNTERS_PER_POOL * MLX5_CNT_SIZE;
 	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_AGE_SIZE);
 	pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
@@ -6285,11 +6255,6 @@  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 	pool->time_of_last_age_check = MLX5_CURR_TIME_SEC;
 	rte_spinlock_lock(&cmng->pool_update_sl);
 	pool->index = cmng->n_valid;
-	if (pool->index == cmng->n && flow_dv_container_resize(dev)) {
-		mlx5_free(pool);
-		rte_spinlock_unlock(&cmng->pool_update_sl);
-		return NULL;
-	}
 	cmng->pools[pool->index] = pool;
 	cmng->n_valid++;
 	if (unlikely(fallback)) {
@@ -12511,7 +12476,7 @@  flow_dv_aso_age_release(struct rte_eth_dev *dev, uint32_t age_idx)
 }
 
 /**
- * Resize the ASO age pools array by MLX5_CNT_CONTAINER_RESIZE pools.
+ * Resize the ASO age pools array by MLX5_ASO_AGE_CONTAINER_RESIZE pools.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -12525,7 +12490,7 @@  flow_dv_aso_age_pools_resize(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
 	void *old_pools = mng->pools;
-	uint32_t resize = mng->n + MLX5_CNT_CONTAINER_RESIZE;
+	uint32_t resize = mng->n + MLX5_ASO_AGE_CONTAINER_RESIZE;
 	uint32_t mem_size = sizeof(struct mlx5_aso_age_pool *) * resize;
 	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 81a33ddf09..4bca685674 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -232,27 +232,14 @@  flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t id __rte_unused)
 			break;
 	}
 	if (!cnt) {
-		struct mlx5_flow_counter_pool **pools;
 		uint32_t size;
 
-		if (n_valid == cmng->n) {
-			/* Resize the container pool array. */
-			size = sizeof(struct mlx5_flow_counter_pool *) *
-				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
-			pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
-					    SOCKET_ID_ANY);
-			if (!pools)
-				return 0;
-			if (n_valid) {
-				memcpy(pools, cmng->pools,
-				       sizeof(struct mlx5_flow_counter_pool *) *
-				       n_valid);
-				mlx5_free(cmng->pools);
-			}
-			cmng->pools = pools;
-			cmng->n += MLX5_CNT_CONTAINER_RESIZE;
+		if (n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
+			DRV_LOG(ERR, "All counter is in used, try again later.");
+			rte_errno = EAGAIN;
+			return 0;
 		}
-		/* Allocate memory for new pool*/
+		/* Allocate memory for new pool */
 		size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
 		pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
 		if (!pool)