net/mlx5: fix GENEVE resource management

Message ID 20221116093654.19165-1-suanmingm@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix GENEVE resource management |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS

Commit Message

Suanming Mou Nov. 16, 2022, 9:36 a.m. UTC
  The item translation split causes GENEVE TLV option resource register
function flow_dev_geneve_tlv_option_resource_register() to be called
twice incorrectly both in spec and mask translation.

In SWS mode the refcnt will only be decreased by 1 in flow release.
The refcnt will never be 0 again, it causes the resource be leaked.
In HWS mode the resource is allocated as global, the refcnt should
not be increased after the resource be allocated. And the resource
should be released during PMD exists.

This commit fixes GENEVE resource management.

Fixes: 75a00812b18f ("net/mlx5: add hardware steering item translation")
Fixes: cd4ab742064a ("net/mlx5: split flow item matcher and value translation")

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5.c         |  8 +++++++-
 drivers/net/mlx5/mlx5_flow.h    |  2 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 31 ++++++++++++++++++-------------
 3 files changed, 27 insertions(+), 14 deletions(-)
  

Comments

Raslan Darawsheh Nov. 17, 2022, 8:38 a.m. UTC | #1
Hi,

> -----Original Message-----
> From: Suanming Mou <suanmingm@nvidia.com>
> Sent: Wednesday, November 16, 2022 11:37 AM
> To: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>
> Cc: dev@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>
> Subject: [PATCH] net/mlx5: fix GENEVE resource management
> 
> The item translation split causes GENEVE TLV option resource register
> function flow_dev_geneve_tlv_option_resource_register() to be called
> twice incorrectly both in spec and mask translation.
> 
> In SWS mode the refcnt will only be decreased by 1 in flow release.
> The refcnt will never be 0 again, it causes the resource be leaked.
> In HWS mode the resource is allocated as global, the refcnt should
> not be increased after the resource be allocated. And the resource
> should be released during PMD exists.
> 
> This commit fixes GENEVE resource management.
> 
> Fixes: 75a00812b18f ("net/mlx5: add hardware steering item translation")
> Fixes: cd4ab742064a ("net/mlx5: split flow item matcher and value
> translation")
> 
> Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b3efdad293..6a0d66247a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1757,7 +1757,13 @@  mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
 	} while (++i < sh->bond.n_port);
 	if (sh->td)
 		claim_zero(mlx5_devx_cmd_destroy(sh->td));
-	MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
+#ifdef HAVE_MLX5_HWS_SUPPORT
+	/* HWS manages geneve_tlv_option resource as global. */
+	if (sh->config.dv_flow_en == 2)
+		flow_dev_geneve_tlv_option_resource_release(sh);
+	else
+#endif
+		MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
 	pthread_mutex_destroy(&sh->txpp.mutex);
 	mlx5_lwm_unset(sh);
 	mlx5_free(sh);
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 94e0ac99b9..1f57ecd6e1 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -2484,6 +2484,8 @@  struct mlx5_aso_age_action *flow_aso_age_get_by_idx(struct rte_eth_dev *dev,
 int flow_dev_geneve_tlv_option_resource_register(struct rte_eth_dev *dev,
 					     const struct rte_flow_item *item,
 					     struct rte_flow_error *error);
+void flow_dev_geneve_tlv_option_resource_release(struct mlx5_dev_ctx_shared *sh);
+
 void flow_release_workspace(void *data);
 int mlx5_flow_os_init_workspace_once(void);
 void *mlx5_flow_os_get_specific_workspace(void);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index bc9a75f225..a9357096f5 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9493,9 +9493,13 @@  flow_dev_geneve_tlv_option_resource_register(struct rte_eth_dev *dev,
 			geneve_opt_v->option_type &&
 			geneve_opt_resource->length ==
 			geneve_opt_v->option_len) {
-			/* We already have GENEVE TLV option obj allocated. */
-			__atomic_fetch_add(&geneve_opt_resource->refcnt, 1,
-					   __ATOMIC_RELAXED);
+			/*
+			 * We already have GENEVE TLV option obj allocated.
+			 * Increasing refcnt only in SWS. HWS uses it as global.
+			 */
+			if (priv->sh->config.dv_flow_en == 1)
+				__atomic_fetch_add(&geneve_opt_resource->refcnt, 1,
+						   __ATOMIC_RELAXED);
 		} else {
 			ret = rte_flow_error_set(error, ENOMEM,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -9571,11 +9575,14 @@  flow_dv_translate_item_geneve_opt(struct rte_eth_dev *dev, void *key,
 		return -1;
 	MLX5_ITEM_UPDATE(item, key_type, geneve_opt_v, geneve_opt_m,
 			 &rte_flow_item_geneve_opt_mask);
-	ret = flow_dev_geneve_tlv_option_resource_register(dev, item,
-							   error);
-	if (ret) {
-		DRV_LOG(ERR, "Failed to create geneve_tlv_obj");
-		return ret;
+	/* Register resource requires item spec. */
+	if (key_type & MLX5_SET_MATCHER_V) {
+		ret = flow_dev_geneve_tlv_option_resource_register(dev, item,
+								   error);
+		if (ret) {
+			DRV_LOG(ERR, "Failed to create geneve_tlv_obj");
+			return ret;
+		}
 	}
 	/*
 	 * Set the option length in GENEVE header if not requested.
@@ -15226,11 +15233,9 @@  flow_dv_dest_array_resource_release(struct rte_eth_dev *dev,
 				    &resource->entry);
 }
 
-static void
-flow_dv_geneve_tlv_option_resource_release(struct rte_eth_dev *dev)
+void
+flow_dev_geneve_tlv_option_resource_release(struct mlx5_dev_ctx_shared *sh)
 {
-	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_dev_ctx_shared *sh = priv->sh;
 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource =
 				sh->geneve_tlv_option_resource;
 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
@@ -15318,7 +15323,7 @@  flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 	else if (flow->age)
 		flow_dv_aso_age_release(dev, flow->age);
 	if (flow->geneve_tlv_option) {
-		flow_dv_geneve_tlv_option_resource_release(dev);
+		flow_dev_geneve_tlv_option_resource_release(priv->sh);
 		flow->geneve_tlv_option = 0;
 	}
 	while (flow->dev_handles) {