net/mlx5: fix async flow create error handling

Message ID 20240305180514.50520-1-dsosnowski@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix async flow create error handling |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-sample-apps-testing success Testing PASS

Commit Message

Dariusz Sosnowski March 5, 2024, 6:05 p.m. UTC
  Whenever processing of asynchronous flow rule create operation failed,
but after some dynamic flow actions had already been allocated,
these actions were not freed during error handling flow.
That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked
which triggered assertions during device cleanup.

This patch adds flow rule cleanup handling in case of an error
during async flow rule creation.

Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
Cc: suanmingm@nvidia.com
Cc: stable@dpdk.org

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_hw.c | 78 +++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 23 deletions(-)
  

Comments

Raslan Darawsheh March 13, 2024, 7:43 a.m. UTC | #1
Hi,
> -----Original Message-----
> From: Dariusz Sosnowski <dsosnowski@nvidia.com>
> Sent: Tuesday, March 5, 2024 8:05 PM
> To: Matan Azrad <matan@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Ori Kam <orika@nvidia.com>; Suanming Mou
> <suanmingm@nvidia.com>
> Cc: dev@dpdk.org; stable@dpdk.org
> Subject: [PATCH] net/mlx5: fix async flow create error handling
> 
> Whenever processing of asynchronous flow rule create operation failed, but
> after some dynamic flow actions had already been allocated, these actions
> were not freed during error handling flow.
> That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked which
> triggered assertions during device cleanup.
> 
> This patch adds flow rule cleanup handling in case of an error during async
> flow rule creation.
> 
> Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
> Cc: suanmingm@nvidia.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> Acked-by: Ori Kam <orika@nvidia.com>
Patch applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 4216433c6e..5a407d592c 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -174,7 +174,7 @@  mlx5_flow_hw_aux_set_mtr_id(struct rte_flow_hw *flow,
 		aux->orig.mtr_id = mtr_id;
 }
 
-static __rte_always_inline uint32_t __rte_unused
+static __rte_always_inline uint32_t
 mlx5_flow_hw_aux_get_mtr_id(struct rte_flow_hw *flow, struct rte_flow_hw_aux *aux)
 {
 	if (unlikely(flow->operation_type == MLX5_FLOW_HW_FLOW_OP_TYPE_UPDATE))
@@ -183,6 +183,10 @@  mlx5_flow_hw_aux_get_mtr_id(struct rte_flow_hw *flow, struct rte_flow_hw_aux *au
 		return aux->orig.mtr_id;
 }
 
+static void
+flow_hw_age_count_release(struct mlx5_priv *priv, uint32_t queue, struct rte_flow_hw *flow,
+			  struct rte_flow_error *error);
+
 static int
 mlx5_tbl_multi_pattern_process(struct rte_eth_dev *dev,
 			       struct rte_flow_template_table *tbl,
@@ -3034,6 +3038,31 @@  flow_hw_modify_field_construct(struct mlx5_modification_cmd *mhdr_cmd,
 	return 0;
 }
 
+/**
+ * Release any actions allocated for the flow rule during actions construction.
+ *
+ * @param[in] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_hw_release_actions(struct rte_eth_dev *dev,
+			uint32_t queue,
+			struct rte_flow_hw *flow)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
+	struct rte_flow_hw_aux *aux = mlx5_flow_hw_aux(dev->data->port_id, flow);
+
+	if (flow->flags & MLX5_FLOW_HW_FLOW_FLAG_FATE_JUMP)
+		flow_hw_jump_release(dev, flow->jump);
+	else if (flow->flags & MLX5_FLOW_HW_FLOW_FLAG_FATE_HRXQ)
+		mlx5_hrxq_obj_release(dev, flow->hrxq);
+	if (flow->flags & MLX5_FLOW_HW_FLOW_FLAG_CNT_ID)
+		flow_hw_age_count_release(priv, queue, flow, NULL);
+	if (flow->flags & MLX5_FLOW_HW_FLOW_FLAG_MTR_ID)
+		mlx5_ipool_free(pool->idx_pool, mlx5_flow_hw_aux_get_mtr_id(flow, aux));
+}
+
 /**
  * Construct flow action array.
  *
@@ -3156,7 +3185,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 					(dev, queue, action, table, it_idx,
 					 at->action_flags, flow,
 					 &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
@@ -3176,7 +3205,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			jump = flow_hw_jump_action_register
 				(dev, &table->cfg, jump_group, NULL);
 			if (!jump)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 			(!!attr.group) ? jump->hws_action : jump->root_action;
 			flow->jump = jump;
@@ -3188,7 +3217,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 					ft_flag,
 					action);
 			if (!hrxq)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action = hrxq->action;
 			flow->hrxq = hrxq;
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_FATE_HRXQ;
@@ -3198,19 +3227,19 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			if (flow_hw_shared_action_get
 				(dev, act_data, item_flags,
 				 &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
 			enc_item = ((const struct rte_flow_action_vxlan_encap *)
 				   action->conf)->definition;
 			if (flow_dv_convert_encap_data(enc_item, ap->encap_data, &encap_len, NULL))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
 			enc_item = ((const struct rte_flow_action_nvgre_encap *)
 				   action->conf)->definition;
 			if (flow_dv_convert_encap_data(enc_item, ap->encap_data, &encap_len, NULL))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
 			raw_encap_data =
@@ -3238,12 +3267,12 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 								     hw_acts,
 								     action);
 			if (ret)
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT:
 			port_action = action->conf;
 			if (!priv->hw_vport[port_action->port_id])
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 					priv->hw_vport[port_action->port_id];
 			break;
@@ -3263,7 +3292,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			jump = flow_hw_jump_action_register
 				(dev, &table->cfg, aso_mtr->fm.group, NULL);
 			if (!jump)
-				return -1;
+				goto error;
 			MLX5_ASSERT
 				(!rule_acts[act_data->action_dst + 1].action);
 			rule_acts[act_data->action_dst + 1].action =
@@ -3272,7 +3301,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			flow->jump = jump;
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_FATE_JUMP;
 			if (mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_AGE:
 			aux = mlx5_flow_hw_aux(dev->data->port_id, flow);
@@ -3288,7 +3317,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 							     flow->res_idx,
 							     error);
 			if (age_idx == 0)
-				return -rte_errno;
+				goto error;
 			mlx5_flow_hw_aux_set_age_idx(flow, aux, age_idx);
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_AGE_IDX;
 			if (at->action_flags & MLX5_FLOW_ACTION_INDIRECT_COUNT)
@@ -3303,7 +3332,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
 			ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
 			if (ret != 0)
-				return ret;
+				goto error;
 			ret = mlx5_hws_cnt_pool_get_action_offset
 				(priv->hws_cpool,
 				 cnt_id,
@@ -3311,7 +3340,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 				 &rule_acts[act_data->action_dst].counter.offset
 				 );
 			if (ret != 0)
-				return ret;
+				goto error;
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_CNT_ID;
 			flow->cnt_id = cnt_id;
 			break;
@@ -3323,7 +3352,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 				 &rule_acts[act_data->action_dst].counter.offset
 				 );
 			if (ret != 0)
-				return ret;
+				goto error;
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_CNT_ID;
 			flow->cnt_id = act_data->shared_counter.id;
 			break;
@@ -3331,7 +3360,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			ct_idx = MLX5_INDIRECT_ACTION_IDX_GET(action->conf);
 			if (flow_hw_ct_compile(dev, queue, ct_idx,
 					       &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK:
 			mtr_id = act_data->shared_meter.id &
@@ -3339,7 +3368,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 			/* Find ASO object. */
 			aso_mtr = mlx5_ipool_get(pool->idx_pool, mtr_id);
 			if (!aso_mtr)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 							pool->action;
 			rule_acts[act_data->action_dst].aso_meter.offset =
@@ -3354,7 +3383,7 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 				act_data->action_dst, action,
 				rule_acts, &mtr_idx, MLX5_HW_INV_QUEUE, error);
 			if (ret != 0)
-				return ret;
+				goto error;
 			aux = mlx5_flow_hw_aux(dev->data->port_id, flow);
 			mlx5_flow_hw_aux_set_mtr_id(flow, aux, mtr_idx);
 			flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_MTR_ID;
@@ -3396,11 +3425,11 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 		struct mlx5dr_rule_action *ra = &rule_acts[hw_acts->encap_decap_pos];
 
 		if (ix < 0)
-			return -1;
+			goto error;
 		if (!mp_segment)
 			mp_segment = mlx5_multi_pattern_segment_find(table, flow->res_idx);
 		if (!mp_segment || !mp_segment->reformat_action[ix])
-			return -1;
+			goto error;
 		ra->action = mp_segment->reformat_action[ix];
 		/* reformat offset is relative to selected DR action */
 		ra->reformat.offset = flow->res_idx - mp_segment->head_index;
@@ -3416,6 +3445,11 @@  flow_hw_actions_construct(struct rte_eth_dev *dev,
 		flow->cnt_id = hw_acts->cnt_id;
 	}
 	return 0;
+
+error:
+	flow_hw_release_actions(dev, queue, flow);
+	rte_errno = EINVAL;
+	return -rte_errno;
 }
 
 static const struct rte_flow_item *
@@ -3565,10 +3599,8 @@  flow_hw_async_flow_create(struct rte_eth_dev *dev,
 	if (flow_hw_actions_construct(dev, flow, &ap,
 				      &table->ats[action_template_index],
 				      pattern_template_index, actions,
-				      rule_acts, queue, error)) {
-		rte_errno = EINVAL;
+				      rule_acts, queue, error))
 		goto error;
-	}
 	rule_items = flow_hw_get_rule_items(dev, table, items,
 					    pattern_template_index, &pp);
 	if (!rule_items)