[V1,2/2] net/mlx5: use traffic class PRM field for IPv6 modification

Message ID 20240112075055.1288263-3-gavinl@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series use traffic class PRM field for IPv6 modification |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Gavin Li Jan. 12, 2024, 7:50 a.m. UTC
  New PRM defined new field OUT_IPV6_TRAFFIC_CLASS for IPv6 which will be
used by both IPv6 ECN and DSCP. A new cap bit
modify_out_ipv6_traffic_class is added. It can be used to check if the
new field is supported by FW.

However, IPv6 ECN and DSCP starts from different offset in the same byte.
Update SWS and HWS to used the new filed and introduce extra offset for
IPv6 DSCP data and mask to solve the issue.

Signed-off-by: Gavin Li <gavinl@nvidia.com>
Acked-by: Suanming Mou <suanmingm@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  3 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h       |  8 ++-
 drivers/net/mlx5/linux/mlx5_os.c     |  5 +-
 drivers/net/mlx5/mlx5_flow.h         |  3 ++
 drivers/net/mlx5/mlx5_flow_dv.c      | 78 ++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow_hw.c      |  7 +++
 7 files changed, 92 insertions(+), 13 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 4d8818924a..3a894f894a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1229,6 +1229,9 @@  mlx5_devx_cmd_query_hca_attr(void *ctx,
 	attr->modify_outer_ip_ecn = MLX5_GET
 		(flow_table_nic_cap, hcattr,
 		 ft_header_modify_nic_receive.outer_ip_ecn);
+	attr->modify_outer_ipv6_traffic_class = MLX5_GET
+		(flow_table_nic_cap, hcattr,
+		 ft_header_modify_nic_receive.outer_ipv6_traffic_class);
 	attr->set_reg_c = 0xffff;
 	if (attr->nic_flow_table) {
 #define GET_RX_REG_X_BITS \
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 7f23e925a5..4a6008dc1a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -304,6 +304,7 @@  struct mlx5_hca_attr {
 	uint32_t set_reg_c:16;
 	uint32_t nic_flow_table:1;
 	uint32_t modify_outer_ip_ecn:1;
+	uint32_t modify_outer_ipv6_traffic_class:1;
 	union {
 		uint32_t max_flow_counter;
 		struct {
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 0d46ba9c40..69404b5ed8 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -848,6 +848,7 @@  enum mlx5_modification_field {
 	MLX5_MODI_META_REG_C_13 = 0x94,
 	MLX5_MODI_META_REG_C_14 = 0x95,
 	MLX5_MODI_META_REG_C_15 = 0x96,
+	MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS = 0x11C,
 	MLX5_MODI_OUT_IPV4_TOTAL_LEN = 0x11D,
 	MLX5_MODI_OUT_IPV6_PAYLOAD_LEN = 0x11E,
 	MLX5_MODI_OUT_IPV4_IHL = 0x11F,
@@ -2202,7 +2203,9 @@  struct mlx5_ifc_ft_fields_support_bits {
 		u8 metadata_reg_c_x[0x8];
 	}; /* end of DW3 */
 	/* set_action_field_support_2 */
-	u8 reserved_at_80[0x80];
+	u8 reserved_at_80[0x37];
+	u8 outer_ipv6_traffic_class[0x1];
+	u8 reserved_at_B8[0x48];
 	/* add_action_field_support */
 	u8 reserved_at_100[0x80];
 	/* add_action_field_support_2 */
@@ -2240,7 +2243,8 @@  struct mlx5_ifc_ft_fields_support_2_bits {
 	u8 inner_l4_checksum_ok[0x1];
 	u8 outer_ipv4_checksum_ok[0x1];
 	u8 outer_l4_checksum_ok[0x1]; /* end of DW0 */
-	u8 reserved_at_20[0x18];
+	u8 reserved_at_20[0x17];
+	u8 outer_ipv6_traffic_class[0x1];
 	union {
 		struct {
 			u8 metadata_reg_c_15[0x1];
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5ae31c88f4..6ea0296109 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1602,9 +1602,10 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 	}
 	rte_rwlock_init(&priv->ind_tbls_lock);
-	if (sh->config.dv_flow_en == 1 &&
+	if (!priv->sh->cdev->config.hca_attr.modify_outer_ipv6_traffic_class ||
+	    (sh->config.dv_flow_en == 1 &&
 	    !priv->sh->ipv6_tc_fallback &&
-	    mlx5_flow_discover_ipv6_tc_support(eth_dev))
+	    mlx5_flow_discover_ipv6_tc_support(eth_dev)))
 		priv->sh->ipv6_tc_fallback = 1;
 	if (priv->sh->config.dv_flow_en == 2) {
 #ifdef HAVE_MLX5_HWS_SUPPORT
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 33d4a28077..fe4f46724b 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -413,6 +413,9 @@  enum mlx5_feature_name {
 #define IPPROTO_MPLS 137
 #endif
 
+#define MLX5_IPV6_HDR_ECN_MASK 0x3
+#define MLX5_IPV6_HDR_DSCP_SHIFT 2
+
 /* UDP port number for MPLS */
 #define MLX5_UDP_PORT_MPLS 6635
 
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 97f55003c3..ecf86d861d 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -253,6 +253,11 @@  struct field_modify_info modify_ipv6[] = {
 	{0, 0, 0},
 };
 
+struct field_modify_info modify_ipv6_traffic_class[] = {
+	{1,  0, MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS},
+	{0, 0, 0},
+};
+
 struct field_modify_info modify_udp[] = {
 	{2, 0, MLX5_MODI_OUT_UDP_SPORT},
 	{2, 2, MLX5_MODI_OUT_UDP_DPORT},
@@ -1323,6 +1328,7 @@  static int
 flow_dv_convert_action_modify_ipv6_dscp
 			(struct mlx5_flow_dv_modify_hdr_resource *resource,
 			 const struct rte_flow_action *action,
+			 uint32_t ipv6_tc_off,
 			 struct rte_flow_error *error)
 {
 	const struct rte_flow_action_set_dscp *conf =
@@ -1330,6 +1336,7 @@  flow_dv_convert_action_modify_ipv6_dscp
 	struct rte_flow_item item = { .type = RTE_FLOW_ITEM_TYPE_IPV6 };
 	struct rte_flow_item_ipv6 ipv6;
 	struct rte_flow_item_ipv6 ipv6_mask;
+	struct field_modify_info *modify_info;
 
 	memset(&ipv6, 0, sizeof(ipv6));
 	memset(&ipv6_mask, 0, sizeof(ipv6_mask));
@@ -1338,12 +1345,19 @@  flow_dv_convert_action_modify_ipv6_dscp
 	 * rdma-core only accept the DSCP bits byte aligned start from
 	 * bit 0 to 5 as to be compatible with IPv4. No need to shift the
 	 * bits in IPv6 case as rdma-core requires byte aligned value.
+	 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+	 * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+	 * it's needed to distinguish DSCP from ECN in data field construct
 	 */
-	ipv6.hdr.vtc_flow = conf->dscp;
-	ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> 22;
+	ipv6.hdr.vtc_flow = conf->dscp << ipv6_tc_off;
+	ipv6_mask.hdr.vtc_flow = RTE_IPV6_HDR_DSCP_MASK >> (22 - ipv6_tc_off);
 	item.spec = &ipv6;
 	item.mask = &ipv6_mask;
-	return flow_dv_convert_modify_action(&item, modify_ipv6, NULL, resource,
+	if (ipv6_tc_off)
+		modify_info = modify_ipv6_traffic_class;
+	else
+		modify_info = modify_ipv6;
+	return flow_dv_convert_modify_action(&item, modify_info, NULL, resource,
 					     MLX5_MODIFICATION_TYPE_SET, error);
 }
 
@@ -1576,6 +1590,12 @@  mlx5_modify_flex_item(const struct rte_eth_dev *dev,
 	}
 }
 
+static inline bool
+mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv)
+{
+	return !priv->sh->ipv6_tc_fallback;
+}
+
 void
 mlx5_flow_field_id_to_modify_info
 		(const struct rte_flow_action_modify_data *data,
@@ -1731,9 +1751,20 @@  mlx5_flow_field_id_to_modify_info
 		break;
 	case RTE_FLOW_FIELD_IPV6_DSCP:
 		MLX5_ASSERT(data->offset + width <= 6);
-		off_be = 6 - (data->offset + width);
-		info[idx] = (struct field_modify_info){1, 0,
-					MLX5_MODI_OUT_IP_DSCP};
+		/*
+		 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+		 * bits left. Shift the mask left for IPV6 DSCP. Do it here because
+		 * it's needed to distinguish DSCP from ECN in data field construct
+		 */
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(priv)) {
+			off_be = 6 - (data->offset + width) + MLX5_IPV6_HDR_DSCP_SHIFT;
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+		} else {
+			off_be = 6 - (data->offset + width);
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IP_DSCP};
+		}
 		if (mask)
 			mask[idx] = flow_modify_info_mask_8(width, off_be);
 		else
@@ -2029,7 +2060,6 @@  mlx5_flow_field_id_to_modify_info
 		}
 		break;
 	case RTE_FLOW_FIELD_IPV4_ECN:
-	case RTE_FLOW_FIELD_IPV6_ECN:
 		MLX5_ASSERT(data->offset + width <= 2);
 		off_be = 2 - (data->offset + width);
 		info[idx] = (struct field_modify_info){1, 0,
@@ -2039,6 +2069,20 @@  mlx5_flow_field_id_to_modify_info
 		else
 			info[idx].offset = off_be;
 		break;
+	case RTE_FLOW_FIELD_IPV6_ECN:
+		MLX5_ASSERT(data->offset + width <= 2);
+		off_be = 2 - (data->offset + width);
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS};
+		else
+			info[idx] = (struct field_modify_info){1, 0,
+						MLX5_MODI_OUT_IP_ECN};
+		if (mask)
+			mask[idx] = flow_modify_info_mask_8(width, off_be);
+		else
+			info[idx].offset = off_be;
+		break;
 	case RTE_FLOW_FIELD_GTP_PSC_QFI:
 		MLX5_ASSERT(data->offset + width <= 8);
 		off_be = data->offset + 8;
@@ -2161,7 +2205,7 @@  flow_dv_convert_action_modify_field
 	struct field_modify_info dcopy[MLX5_ACT_MAX_MOD_FIELDS] = {
 								{0, 0, 0} };
 	uint32_t mask[MLX5_ACT_MAX_MOD_FIELDS] = {0, 0, 0, 0, 0};
-	uint32_t type, meta = 0;
+	uint32_t type, meta = 0, dscp = 0;
 
 	if (conf->src.field == RTE_FLOW_FIELD_POINTER ||
 	    conf->src.field == RTE_FLOW_FIELD_VALUE) {
@@ -2181,6 +2225,17 @@  flow_dv_convert_action_modify_field
 			meta = rte_cpu_to_be_32(meta);
 			item.spec = &meta;
 		}
+		if (mlx5_dv_modify_ipv6_traffic_class_supported(dev->data->dev_private) &&
+		    conf->dst.field == RTE_FLOW_FIELD_IPV6_DSCP &&
+		    !(mask[0] & MLX5_IPV6_HDR_ECN_MASK)) {
+			dscp = *(const unaligned_uint32_t *)item.spec;
+			/*
+			 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+			 * bits left. Shift the data left for IPV6 DSCP
+			 */
+			dscp <<= MLX5_IPV6_HDR_DSCP_SHIFT;
+			item.spec = &dscp;
+		}
 	} else {
 		type = MLX5_MODIFICATION_TYPE_COPY;
 		/** For COPY fill the destination field (dcopy) without mask. */
@@ -14385,6 +14440,7 @@  flow_dv_translate(struct rte_eth_dev *dev,
 	struct mlx5_flow_sub_actions_list *sample_act;
 	uint32_t sample_act_pos = UINT32_MAX;
 	uint32_t age_act_pos = UINT32_MAX;
+	uint32_t ipv6_tc_off = 0;
 	uint32_t num_of_dest = 0;
 	int tmp_actions_n = 0;
 	uint32_t table;
@@ -14941,8 +14997,12 @@  flow_dv_translate(struct rte_eth_dev *dev,
 			action_flags |= MLX5_FLOW_ACTION_SET_IPV4_DSCP;
 			break;
 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
+			if (mlx5_dv_modify_ipv6_traffic_class_supported(priv))
+				ipv6_tc_off = MLX5_IPV6_HDR_DSCP_SHIFT;
+			else
+				ipv6_tc_off = 0;
 			if (flow_dv_convert_action_modify_ipv6_dscp(mhdr_res,
-							      actions, error))
+							      actions, ipv6_tc_off, error))
 				return -rte_errno;
 			action_flags |= MLX5_FLOW_ACTION_SET_IPV6_DSCP;
 			break;
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index c4a90a3690..504a250e44 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2862,6 +2862,13 @@  flow_hw_modify_field_construct(struct mlx5_hw_q_job *job,
 		}
 		off_b = rte_bsf32(mask);
 		data = flow_dv_fetch_field(values + field->offset, field->size);
+		/*
+		 * IPV6 DSCP uses OUT_IPV6_TRAFFIC_CLASS as ID but it starts from 2
+		 * bits left. Shift the data left for IPV6 DSCP
+		 */
+		if (field->id == MLX5_MODI_OUT_IPV6_TRAFFIC_CLASS &&
+		    !(mask & MLX5_IPV6_HDR_ECN_MASK))
+			data <<= MLX5_IPV6_HDR_DSCP_SHIFT;
 		data = (data & mask) >> off_b;
 		job->mhdr_cmd[i++].data1 = rte_cpu_to_be_32(data);
 		++field;