[dpdk-dev,v2,07/15] net/mlx5: support tunnel RSS level

Message ID 20180410133415.189905-8-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Xueming Li April 10, 2018, 1:34 p.m. UTC
  Tunnel RSS level of flow RSS action offers user a choice to do RSS hash
calculation on inner or outer RSS fields. Testpmd flow command examples:

GRE flow inner RSS:
  flow create 0 ingress pattern eth / ipv4 proto is 47 / gre / end
actions rss queues 1 2 end level 1 / end

GRE tunnel flow outer RSS:
  flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
actions rss queues 1 2 end level 0 / end

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/Makefile    |   2 +-
 drivers/net/mlx5/mlx5_flow.c | 249 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_glue.c |  16 +++
 drivers/net/mlx5/mlx5_glue.h |   8 ++
 drivers/net/mlx5/mlx5_rxq.c  |  46 +++++++-
 drivers/net/mlx5/mlx5_rxtx.h |   5 +-
 6 files changed, 246 insertions(+), 80 deletions(-)
  

Comments

Nélio Laranjeiro April 11, 2018, 8:55 a.m. UTC | #1
On Tue, Apr 10, 2018 at 09:34:07PM +0800, Xueming Li wrote:
> Tunnel RSS level of flow RSS action offers user a choice to do RSS hash
> calculation on inner or outer RSS fields. Testpmd flow command examples:
> 
> GRE flow inner RSS:
>   flow create 0 ingress pattern eth / ipv4 proto is 47 / gre / end
> actions rss queues 1 2 end level 1 / end
> 
> GRE tunnel flow outer RSS:
>   flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
> actions rss queues 1 2 end level 0 / end
> 
> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/Makefile    |   2 +-
>  drivers/net/mlx5/mlx5_flow.c | 249 ++++++++++++++++++++++++++++++-------------
>  drivers/net/mlx5/mlx5_glue.c |  16 +++
>  drivers/net/mlx5/mlx5_glue.h |   8 ++
>  drivers/net/mlx5/mlx5_rxq.c  |  46 +++++++-
>  drivers/net/mlx5/mlx5_rxtx.h |   5 +-
>  6 files changed, 246 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index ae118ad33..f9a6c460b 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -35,7 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  LIB = librte_pmd_mlx5.a
>  LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
>  LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
> -LIB_GLUE_VERSION = 18.02.0
> +LIB_GLUE_VERSION = 18.05.0
>  
>  # Sources.
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 64658bc0e..66c7d7993 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -113,6 +113,7 @@ enum hash_rxq_type {
>  	HASH_RXQ_UDPV6,
>  	HASH_RXQ_IPV6,
>  	HASH_RXQ_ETH,
> +	HASH_RXQ_TUNNEL,
>  };
>  
>  /* Initialization data for hash RX queue. */
> @@ -451,6 +452,7 @@ struct mlx5_flow_parse {
>  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
>  	uint8_t rss_key[40]; /**< copy of the RSS key. */
>  	enum hash_rxq_type layer; /**< Last pattern layer detected. */
> +	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
>  	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
>  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
>  	struct {
> @@ -458,6 +460,7 @@ struct mlx5_flow_parse {
>  		/**< Pointer to Verbs attributes. */
>  		unsigned int offset;
>  		/**< Current position or total size of the attribute. */
> +		uint64_t hash_fields; /**< Verbs hash fields. */
>  	} queue[RTE_DIM(hash_rxq_init)];
>  };
>  
> @@ -698,7 +701,8 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  						   " function is Toeplitz");
>  				return -rte_errno;
>  			}
> -			if (rss->level) {
> +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +			if (parser->rss_conf.level > 0) {

According to Adrien's API level 0 means do whatever you want and 1 means outer.
This is removing the outer RSS support.

>  				rte_flow_error_set(error, EINVAL,
>  						   RTE_FLOW_ERROR_TYPE_ACTION,
>  						   actions,
> @@ -706,6 +710,15 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  						   " level is not supported");
>  				return -rte_errno;
>  			}
> +#endif
> +			if (parser->rss_conf.level > 1) {
> +				rte_flow_error_set(error, EINVAL,
> +						   RTE_FLOW_ERROR_TYPE_ACTION,
> +						   actions,
> +						   "RSS encapsulation level"
> +						   " > 1 is not supported");
> +				return -rte_errno;
> +			}

Seems the levels are wrongly used.

>  			if (rss->types & MLX5_RSS_HF_MASK) {
>  				rte_flow_error_set(error, EINVAL,
>  						   RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -756,7 +769,7 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  			}
>  			parser->rss_conf = (struct rte_flow_action_rss){
>  				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> -				.level = 0,
> +				.level = rss->level,
>  				.types = rss->types,
>  				.key_len = rss_key_len,
>  				.queue_num = rss->queue_num,
> @@ -842,11 +855,12 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_items_validate(struct rte_eth_dev *dev __rte_unused,
> +mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
>  				 const struct rte_flow_item items[],
>  				 struct rte_flow_error *error,
>  				 struct mlx5_flow_parse *parser)
>  {
> +	struct priv *priv = dev->data->dev_private;
>  	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
>  	unsigned int i;
>  	int ret = 0;
> @@ -886,6 +900,14 @@ mlx5_flow_convert_items_validate(struct rte_eth_dev *dev __rte_unused,
>  						   " tunnel encapsulations.");
>  				return -rte_errno;
>  			}
> +			if (!priv->config.tunnel_en &&
> +			    parser->rss_conf.level) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					RTE_FLOW_ERROR_TYPE_ITEM,
> +					items,
> +					"Tunnel offloading not enabled");

I would suggest "RSS on tunnel is not supported".

> +				return -rte_errno;
> +			}
>  			parser->inner = IBV_FLOW_SPEC_INNER;
>  			parser->tunnel = flow_ptype[items->type];
>  		}
> @@ -993,7 +1015,11 @@ static void
>  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  {
>  	unsigned int i;
> +	uint32_t inner = parser->inner;
>  
> +	/* Don't create extra flows for outer RSS. */
> +	if (parser->tunnel && !parser->rss_conf.level)
> +		return;
>  	/* Remove any other flow not matching the pattern. */
>  	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
>  		for (i = 0; i != hash_rxq_init_n; ++i) {
> @@ -1014,23 +1040,25 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  			struct ibv_flow_spec_ipv4_ext ipv4;
>  			struct ibv_flow_spec_ipv6 ipv6;
>  			struct ibv_flow_spec_tcp_udp udp_tcp;
> +			struct ibv_flow_spec_eth eth;
>  		} specs;
>  		void *dst;
>  		uint16_t size;
>  
>  		if (i == parser->layer)
>  			continue;
> -		if (parser->layer == HASH_RXQ_ETH) {
> +		if (parser->layer == HASH_RXQ_ETH ||
> +		    parser->layer == HASH_RXQ_TUNNEL) {
>  			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
>  				size = sizeof(struct ibv_flow_spec_ipv4_ext);
>  				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
> -					.type = IBV_FLOW_SPEC_IPV4_EXT,
> +					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
>  					.size = size,
>  				};
>  			} else {
>  				size = sizeof(struct ibv_flow_spec_ipv6);
>  				specs.ipv6 = (struct ibv_flow_spec_ipv6){
> -					.type = IBV_FLOW_SPEC_IPV6,
> +					.type = inner | IBV_FLOW_SPEC_IPV6,
>  					.size = size,
>  				};
>  			}
> @@ -1047,7 +1075,7 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
>  			size = sizeof(struct ibv_flow_spec_tcp_udp);
>  			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
> -				.type = ((i == HASH_RXQ_UDPV4 ||
> +				.type = inner | ((i == HASH_RXQ_UDPV4 ||
>  					  i == HASH_RXQ_UDPV6) ?
>  					 IBV_FLOW_SPEC_UDP :
>  					 IBV_FLOW_SPEC_TCP),
> @@ -1068,6 +1096,8 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  /**
>   * Update flows according to pattern and RSS hash fields.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in, out] parser
>   *   Internal parser structure.
>   *
> @@ -1075,20 +1105,63 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
> +mlx5_flow_convert_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser)
>  {
> -	const unsigned int ipv4 =
> +	unsigned int ipv4 =
>  		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
>  	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
>  	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
>  	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
>  	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
> -	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> +	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
>  	unsigned int i;
> +	int found = 0;
>  
> -	if (parser->layer == HASH_RXQ_ETH)
> +	/*
> +	 * Outer RSS.
> +	 * HASH_RXQ_ETH is the only rule since tunnel packet match this
> +	 * rule must match outer pattern.
> +	 */
> +	if (parser->tunnel && !parser->rss_conf.level) {
> +		/* Remove flows other than default. */
> +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
> +		ipv4 = hash_rxq_init[parser->out_layer].ip_version == MLX5_IPV4;
> +		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> +		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &
> +		    parser->rss_conf.types) {
> +			parser->queue[HASH_RXQ_ETH].hash_fields =
> +				hash_rxq_init[parser->out_layer].hash_fields;
> +		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &
> +		    parser->rss_conf.types)) {
> +			parser->queue[HASH_RXQ_ETH].hash_fields =
> +				hash_rxq_init[ip].hash_fields;
> +		} else if (parser->rss_conf.types) {
> +			DRV_LOG(WARNING,
> +				"port %u rss outer hash function doesn't match"
> +				" pattern", dev->data->port_id);

Hash function, what do you mean ?  It seems to be the layers on the ones
the RSS is configured which does not match the Pattern.

Sincerely, I see such warning happening I will fully doubt on the fact
the rule has been taken and applied.
"port 0 rss outer hash function doesn't match pattern" --> what will
happen to the packets matching such flow?  Will they be dropped?
This is not helping at all, so please remove it.

> +		}
> +		return 0;
> +	}
> +	if (parser->layer == HASH_RXQ_ETH || parser->layer == HASH_RXQ_TUNNEL) {
> +		/* Remove unused flows according to hash function. */
> +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> +			if (!parser->queue[i].ibv_attr)
> +				continue;
> +			if (hash_rxq_init[i].dpdk_rss_hf &
> +			    parser->rss_conf.types) {
> +				parser->queue[i].hash_fields =
> +					hash_rxq_init[i].hash_fields;
> +				continue;
> +			}
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
>  		return 0;
> -	/* This layer becomes useless as the pattern define under layers. */
> +	}
> +	/* Remove ETH layer flow. */
>  	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
>  	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
>  	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
> @@ -1098,9 +1171,52 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
>  		rte_free(parser->queue[i].ibv_attr);
>  		parser->queue[i].ibv_attr = NULL;
>  	}
> -	/* Remove impossible flow according to the RSS configuration. */
> -	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
> -	    parser->rss_conf.types) {
> +	/*
> +	 * Keep L4 flows as IP pattern has to support L4 RSS.
> +	 * Otherwise, only keep the flow that match the pattern.
> +	 */

This comment is not clear, please re-word it.

> +	if (parser->layer != ip) {
> +		/* Only keep the flow that match the pattern. */
> +		for (i = hmin; i != (hmax + 1); ++i) {
> +			if (i == parser->layer)
> +				continue;
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
> +	}
> +	if (parser->rss_conf.types) {
> +		/* Remove impossible flow according to the RSS configuration. */
> +		for (i = hmin; i != (hmax + 1); ++i) {
> +			if (!parser->queue[i].ibv_attr)
> +				continue;
> +			if (parser->rss_conf.types &
> +			    hash_rxq_init[i].dpdk_rss_hf) {
> +				parser->queue[i].hash_fields =
> +					hash_rxq_init[i].hash_fields;
> +				found = 1;
> +				continue;
> +			}
> +			/* L4 flow could be used for L3 RSS. */
> +			if (i == parser->layer && i < ip &&
> +			    (hash_rxq_init[ip].dpdk_rss_hf &
> +			     parser->rss_conf.types)) {
> +				parser->queue[i].hash_fields =
> +					hash_rxq_init[ip].hash_fields;
> +				found = 1;
> +				continue;
> +			}
> +			/* L3 flow and L4 hash: non-rss L3 flow. */
> +			if (i == parser->layer && i == ip && found)
> +				/* IP pattern and L4 HF. */
> +				continue;
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
> +		if (!found)
> +			DRV_LOG(WARNING,
> +				"port %u rss hash function doesn't match "
> +				"pattern", dev->data->port_id);

Dito.

> +	} else {
>  		/* Remove any other flow. */
>  		for (i = hmin; i != (hmax + 1); ++i) {
>  			if (i == parser->layer || !parser->queue[i].ibv_attr)
> @@ -1108,8 +1224,6 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
>  			rte_free(parser->queue[i].ibv_attr);
>  			parser->queue[i].ibv_attr = NULL;
>  		}
> -	} else if (!parser->queue[ip].ibv_attr) {
> -		/* no RSS possible with the current configuration. */
>  		parser->rss_conf.queue_num = 1;
>  	}
>  	return 0;
> @@ -1179,10 +1293,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		for (i = 0; i != hash_rxq_init_n; ++i) {
>  			unsigned int offset;
>  
> -			if (!(parser->rss_conf.types &
> -			      hash_rxq_init[i].dpdk_rss_hf) &&
> -			    (i != HASH_RXQ_ETH))
> -				continue;
>  			offset = parser->queue[i].offset;
>  			parser->queue[i].ibv_attr =
>  				mlx5_flow_convert_allocate(offset, error);
> @@ -1194,6 +1304,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	/* Third step. Conversion parse, fill the specifications. */
>  	parser->inner = 0;
>  	parser->tunnel = 0;
> +	parser->layer = HASH_RXQ_ETH;
>  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
>  		struct mlx5_flow_data data = {
>  			.parser = parser,
> @@ -1211,23 +1322,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		if (ret)
>  			goto exit_free;
>  	}
> -	if (parser->mark)
> -		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> -	if (parser->count && parser->create) {
> -		mlx5_flow_create_count(dev, parser);
> -		if (!parser->cs)
> -			goto exit_count_error;
> -	}
>  	/*
>  	 * Last step. Complete missing specification to reach the RSS
>  	 * configuration.
>  	 */
>  	if (!parser->drop)
> -		ret = mlx5_flow_convert_rss(parser);
> +		ret = mlx5_flow_convert_rss(dev, parser);
>  		if (ret)
>  			goto exit_free;
>  		mlx5_flow_convert_finalise(parser);
>  	mlx5_flow_update_priority(dev, parser, attr);
> +	if (parser->mark)
> +		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> +	if (parser->count && parser->create) {
> +		mlx5_flow_create_count(dev, parser);
> +		if (!parser->cs)
> +			goto exit_count_error;
> +	}

Why do you need to move this code?

>  exit_free:
>  	/* Only verification is expected, all resources should be released. */
>  	if (!parser->create) {
> @@ -1275,17 +1386,11 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
>  		if (!parser->queue[i].ibv_attr)
>  			continue;
> -		/* Specification must be the same l3 type or none. */
> -		if (parser->layer == HASH_RXQ_ETH ||
> -		    (hash_rxq_init[parser->layer].ip_version ==
> -		     hash_rxq_init[i].ip_version) ||
> -		    (hash_rxq_init[i].ip_version == 0)) {
> -			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> -					parser->queue[i].offset);
> -			memcpy(dst, src, size);
> -			++parser->queue[i].ibv_attr->num_of_specs;
> -			parser->queue[i].offset += size;
> -		}
> +		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> +				parser->queue[i].offset);
> +		memcpy(dst, src, size);
> +		++parser->queue[i].ibv_attr->num_of_specs;
> +		parser->queue[i].offset += size;
>  	}
>  }
>  
> @@ -1316,9 +1421,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
>  		.size = eth_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_ETH;
> +	parser->layer = HASH_RXQ_ETH;
>  	if (spec) {
>  		unsigned int i;
>  
> @@ -1431,9 +1534,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
>  		.size = ipv4_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_IPV4;
> +	parser->layer = HASH_RXQ_IPV4;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1486,9 +1587,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
>  		.size = ipv6_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_IPV6;
> +	parser->layer = HASH_RXQ_IPV6;
>  	if (spec) {
>  		unsigned int i;
>  		uint32_t vtc_flow_val;
> @@ -1561,13 +1660,10 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
>  		.size = udp_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner) {
> -		if (parser->layer == HASH_RXQ_IPV4)
> -			parser->layer = HASH_RXQ_UDPV4;
> -		else
> -			parser->layer = HASH_RXQ_UDPV6;
> -	}
> +	if (parser->layer == HASH_RXQ_IPV4)
> +		parser->layer = HASH_RXQ_UDPV4;
> +	else
> +		parser->layer = HASH_RXQ_UDPV6;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1610,13 +1706,10 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
>  		.size = tcp_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner) {
> -		if (parser->layer == HASH_RXQ_IPV4)
> -			parser->layer = HASH_RXQ_TCPV4;
> -		else
> -			parser->layer = HASH_RXQ_TCPV6;
> -	}
> +	if (parser->layer == HASH_RXQ_IPV4)
> +		parser->layer = HASH_RXQ_TCPV4;
> +	else
> +		parser->layer = HASH_RXQ_TCPV6;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1666,6 +1759,8 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
>  	id.vni[0] = 0;
>  	parser->inner = IBV_FLOW_SPEC_INNER;
>  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
> +	parser->out_layer = parser->layer;
> +	parser->layer = HASH_RXQ_TUNNEL;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1720,6 +1815,8 @@ mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
>  
>  	parser->inner = IBV_FLOW_SPEC_INNER;
>  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
> +	parser->out_layer = parser->layer;
> +	parser->layer = HASH_RXQ_TUNNEL;
>  	mlx5_flow_create_copy(parser, &tunnel, size);
>  	return 0;
>  }
> @@ -1883,33 +1980,33 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
>  	unsigned int i;
>  
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
> -		uint64_t hash_fields;
> -
>  		if (!parser->queue[i].ibv_attr)
>  			continue;
>  		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
>  		parser->queue[i].ibv_attr = NULL;
> -		hash_fields = hash_rxq_init[i].hash_fields;
> +		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
>  		if (!priv->dev->data->dev_started)
>  			continue;
>  		flow->frxq[i].hrxq =
>  			mlx5_hrxq_get(dev,
>  				      parser->rss_conf.key,
>  				      parser->rss_conf.key_len,
> -				      hash_fields,
> +				      flow->frxq[i].hash_fields,
>  				      parser->rss_conf.queue,
>  				      parser->rss_conf.queue_num,
> -				      parser->tunnel);
> +				      parser->tunnel,
> +				      parser->rss_conf.level);
>  		if (flow->frxq[i].hrxq)
>  			continue;
>  		flow->frxq[i].hrxq =
>  			mlx5_hrxq_new(dev,
>  				      parser->rss_conf.key,
>  				      parser->rss_conf.key_len,
> -				      hash_fields,
> +				      flow->frxq[i].hash_fields,
>  				      parser->rss_conf.queue,
>  				      parser->rss_conf.queue_num,
> -				      parser->tunnel);
> +				      parser->tunnel,
> +				      parser->rss_conf.level);
>  		if (!flow->frxq[i].hrxq) {
>  			return rte_flow_error_set(error, ENOMEM,
>  						  RTE_FLOW_ERROR_TYPE_HANDLE,
> @@ -2006,7 +2103,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
>  		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
>  			dev->data->port_id,
>  			(void *)flow, i,
> -			(void *)flow->frxq[i].hrxq,
> +			(void *)flow->frxq[i].hrxq->qp,
>  			(void *)flow->frxq[i].ibv_flow);
>  	}
>  	if (!flows_n) {
> @@ -2532,19 +2629,21 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  			flow->frxq[i].hrxq =
>  				mlx5_hrxq_get(dev, flow->rss_conf.key,
>  					      flow->rss_conf.key_len,
> -					      hash_rxq_init[i].hash_fields,
> +					      flow->frxq[i].hash_fields,
>  					      flow->rss_conf.queue,
>  					      flow->rss_conf.queue_num,
> -					      flow->tunnel);
> +					      flow->tunnel,
> +					      flow->rss_conf.level);
>  			if (flow->frxq[i].hrxq)
>  				goto flow_create;
>  			flow->frxq[i].hrxq =
>  				mlx5_hrxq_new(dev, flow->rss_conf.key,
>  					      flow->rss_conf.key_len,
> -					      hash_rxq_init[i].hash_fields,
> +					      flow->frxq[i].hash_fields,
>  					      flow->rss_conf.queue,
>  					      flow->rss_conf.queue_num,
> -					      flow->tunnel);
> +					      flow->tunnel,
> +					      flow->rss_conf.level);
>  			if (!flow->frxq[i].hrxq) {
>  				DRV_LOG(DEBUG,
>  					"port %u flow %p cannot be applied",
> diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
> index be684d378..6874aa32a 100644
> --- a/drivers/net/mlx5/mlx5_glue.c
> +++ b/drivers/net/mlx5/mlx5_glue.c
> @@ -313,6 +313,21 @@ mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type)
>  	return mlx5dv_init_obj(obj, obj_type);
>  }
>  
> +static struct ibv_qp *
> +mlx5_glue_dv_create_qp(struct ibv_context *context,
> +		       struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> +		       struct mlx5dv_qp_init_attr *dv_qp_init_attr)
> +{
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	return mlx5dv_create_qp(context, qp_init_attr_ex, dv_qp_init_attr);
> +#else
> +	(void)context;
> +	(void)qp_init_attr_ex;
> +	(void)dv_qp_init_attr;
> +	return NULL;
> +#endif
> +}
> +
>  const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
>  	.version = MLX5_GLUE_VERSION,
>  	.fork_init = mlx5_glue_fork_init,
> @@ -356,4 +371,5 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
>  	.dv_query_device = mlx5_glue_dv_query_device,
>  	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
>  	.dv_init_obj = mlx5_glue_dv_init_obj,
> +	.dv_create_qp = mlx5_glue_dv_create_qp,
>  };
> diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
> index b5efee3b6..841363872 100644
> --- a/drivers/net/mlx5/mlx5_glue.h
> +++ b/drivers/net/mlx5/mlx5_glue.h
> @@ -31,6 +31,10 @@ struct ibv_counter_set_init_attr;
>  struct ibv_query_counter_set_attr;
>  #endif
>  
> +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +struct mlx5dv_qp_init_attr;
> +#endif
> +
>  /* LIB_GLUE_VERSION must be updated every time this structure is modified. */
>  struct mlx5_glue {
>  	const char *version;
> @@ -106,6 +110,10 @@ struct mlx5_glue {
>  				   enum mlx5dv_set_ctx_attr_type type,
>  				   void *attr);
>  	int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
> +	struct ibv_qp *(*dv_create_qp)
> +		(struct ibv_context *context,
> +		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> +		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
>  };
>  
>  const struct mlx5_glue *mlx5_glue;
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 073732e16..6e5565fb2 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
>   *   Number of queues.
>   * @param tunnel
>   *   Tunnel type.
> + * @param rss_level
> + *   RSS hash on tunnel level.
>   *
>   * @return
>   *   The Verbs object initialised, NULL otherwise and rte_errno is set.
> @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *
>  mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> +	      const uint16_t *queues, uint32_t queues_n,
> +	      uint32_t tunnel, uint32_t rss_level)

tunnel and rss_level seems to be redundant here.

rss_level > 1 is equivalent to tunnel, there is no need to have both.

>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
>  	struct mlx5_ind_table_ibv *ind_tbl;
>  	struct ibv_qp *qp;
>  	int err;
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	struct mlx5dv_qp_init_attr qp_init_attr = {0};
> +#endif
>  
>  	queues_n = hash_fields ? queues_n : 1;
>  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
> @@ -1410,6 +1416,33 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  		rte_errno = ENOMEM;
>  		return NULL;
>  	}
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	if (tunnel) {
> +		qp_init_attr.comp_mask =
> +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
> +		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
> +	}
> +	qp = mlx5_glue->dv_create_qp(
> +		priv->ctx,
> +		&(struct ibv_qp_init_attr_ex){
> +			.qp_type = IBV_QPT_RAW_PACKET,
> +			.comp_mask =
> +				IBV_QP_INIT_ATTR_PD |
> +				IBV_QP_INIT_ATTR_IND_TABLE |
> +				IBV_QP_INIT_ATTR_RX_HASH,
> +			.rx_hash_conf = (struct ibv_rx_hash_conf){
> +				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> +				.rx_hash_key_len = rss_key_len,
> +				.rx_hash_key = (void *)(uintptr_t)rss_key,
> +				.rx_hash_fields_mask = hash_fields |
> +					(tunnel && rss_level ?
> +					(uint32_t)IBV_RX_HASH_INNER : 0),
> +			},
> +			.rwq_ind_tbl = ind_tbl->ind_table,
> +			.pd = priv->pd,
> +		},
> +		&qp_init_attr);
> +#else
>  	qp = mlx5_glue->create_qp_ex
>  		(priv->ctx,
>  		 &(struct ibv_qp_init_attr_ex){
> @@ -1427,6 +1460,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  			.rwq_ind_tbl = ind_tbl->ind_table,
>  			.pd = priv->pd,
>  		 });
> +#endif
>  	if (!qp) {
>  		rte_errno = errno;
>  		goto error;
> @@ -1439,6 +1473,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	hrxq->rss_key_len = rss_key_len;
>  	hrxq->hash_fields = hash_fields;
>  	hrxq->tunnel = tunnel;
> +	hrxq->rss_level = rss_level;
>  	memcpy(hrxq->rss_key, rss_key, rss_key_len);
>  	rte_atomic32_inc(&hrxq->refcnt);
>  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
> @@ -1448,6 +1483,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	return hrxq;
>  error:
>  	err = rte_errno; /* Save rte_errno before cleanup. */
> +	DRV_LOG(ERR, "port %u: Error creating Hash Rx queue",
> +		dev->data->port_id);

Developer log, please remove it, for the user the flow won't be created
with the correct error reported.

>  	mlx5_ind_table_ibv_release(dev, ind_tbl);
>  	if (qp)
>  		claim_zero(mlx5_glue->destroy_qp(qp));
> @@ -1469,6 +1506,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>   *   Number of queues.
>   * @param tunnel
>   *   Tunnel type.
> + * @param rss_level
> + *   RSS hash on tunnel level
>   *
>   * @return
>   *   An hash Rx queue on success.
> @@ -1477,7 +1516,8 @@ struct mlx5_hrxq *
>  mlx5_hrxq_get(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> +	      const uint16_t *queues, uint32_t queues_n,
> +	      uint32_t tunnel, uint32_t rss_level)

Dito.

>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
> @@ -1494,6 +1534,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,
>  			continue;
>  		if (hrxq->tunnel != tunnel)
>  			continue;
> +		if (hrxq->rss_level != rss_level)
> +			continue;
>  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
>  		if (!ind_tbl)
>  			continue;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index d35605b55..62cf55109 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -147,6 +147,7 @@ struct mlx5_hrxq {
>  	struct ibv_qp *qp; /* Verbs queue pair. */
>  	uint64_t hash_fields; /* Verbs Hash fields. */
>  	uint32_t tunnel; /* Tunnel type. */
> +	uint32_t rss_level; /* RSS on tunnel level. */
>  	uint32_t rss_key_len; /* Hash key length in bytes. */
>  	uint8_t rss_key[]; /* Hash key. */
>  };
> @@ -251,12 +252,12 @@ struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
>  				const uint16_t *queues, uint32_t queues_n,
> -				uint32_t tunnel);
> +				uint32_t tunnel, uint32_t rss_level);
>  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
>  				const uint16_t *queues, uint32_t queues_n,
> -				uint32_t tunnel);
> +				uint32_t tunnel, uint32_t rss_level);
>  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
>  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
>  uint64_t mlx5_get_rx_port_offloads(void);
> -- 
> 2.13.3
> 

Thanks,
  
Xueming Li April 14, 2018, 12:25 p.m. UTC | #2
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Wednesday, April 11, 2018 4:55 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 07/15] net/mlx5: support tunnel RSS level

> 

> On Tue, Apr 10, 2018 at 09:34:07PM +0800, Xueming Li wrote:

> > Tunnel RSS level of flow RSS action offers user a choice to do RSS

> > hash calculation on inner or outer RSS fields. Testpmd flow command

> examples:

> >

> > GRE flow inner RSS:

> >   flow create 0 ingress pattern eth / ipv4 proto is 47 / gre / end

> > actions rss queues 1 2 end level 1 / end

> >

> > GRE tunnel flow outer RSS:

> >   flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end

> > actions rss queues 1 2 end level 0 / end

> >

> > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > ---

> >  drivers/net/mlx5/Makefile    |   2 +-

> >  drivers/net/mlx5/mlx5_flow.c | 249

> > ++++++++++++++++++++++++++++++-------------

> >  drivers/net/mlx5/mlx5_glue.c |  16 +++

> >  drivers/net/mlx5/mlx5_glue.h |   8 ++

> >  drivers/net/mlx5/mlx5_rxq.c  |  46 +++++++-

> >  drivers/net/mlx5/mlx5_rxtx.h |   5 +-

> >  6 files changed, 246 insertions(+), 80 deletions(-)

> >

> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile

> > index ae118ad33..f9a6c460b 100644

> > --- a/drivers/net/mlx5/Makefile

> > +++ b/drivers/net/mlx5/Makefile

> > @@ -35,7 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk  LIB =

> > librte_pmd_mlx5.a  LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)

> >  LIB_GLUE_BASE = librte_pmd_mlx5_glue.so -LIB_GLUE_VERSION = 18.02.0

> > +LIB_GLUE_VERSION = 18.05.0

> >

> >  # Sources.

> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c diff --git

> > a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index

> > 64658bc0e..66c7d7993 100644

> > --- a/drivers/net/mlx5/mlx5_flow.c

> > +++ b/drivers/net/mlx5/mlx5_flow.c

> > @@ -113,6 +113,7 @@ enum hash_rxq_type {

> >  	HASH_RXQ_UDPV6,

> >  	HASH_RXQ_IPV6,

> >  	HASH_RXQ_ETH,

> > +	HASH_RXQ_TUNNEL,

> >  };

> >

> >  /* Initialization data for hash RX queue. */ @@ -451,6 +452,7 @@

> > struct mlx5_flow_parse {

> >  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use.

> */

> >  	uint8_t rss_key[40]; /**< copy of the RSS key. */

> >  	enum hash_rxq_type layer; /**< Last pattern layer detected. */

> > +	enum hash_rxq_type out_layer; /**< Last outer pattern layer

> > +detected. */

> >  	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */

> >  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule

> */

> >  	struct {

> > @@ -458,6 +460,7 @@ struct mlx5_flow_parse {

> >  		/**< Pointer to Verbs attributes. */

> >  		unsigned int offset;

> >  		/**< Current position or total size of the attribute. */

> > +		uint64_t hash_fields; /**< Verbs hash fields. */

> >  	} queue[RTE_DIM(hash_rxq_init)];

> >  };

> >

> > @@ -698,7 +701,8 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,

> >  						   " function is Toeplitz");

> >  				return -rte_errno;

> >  			}

> > -			if (rss->level) {

> > +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > +			if (parser->rss_conf.level > 0) {

> 

> According to Adrien's API level 0 means do whatever you want and 1 means

> outer.

> This is removing the outer RSS support.

> 

> >  				rte_flow_error_set(error, EINVAL,

> >  						   RTE_FLOW_ERROR_TYPE_ACTION,

> >  						   actions,

> > @@ -706,6 +710,15 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,

> >  						   " level is not supported");

> >  				return -rte_errno;

> >  			}

> > +#endif

> > +			if (parser->rss_conf.level > 1) {

> > +				rte_flow_error_set(error, EINVAL,

> > +						   RTE_FLOW_ERROR_TYPE_ACTION,

> > +						   actions,

> > +						   "RSS encapsulation level"

> > +						   " > 1 is not supported");

> > +				return -rte_errno;

> > +			}

> 

> Seems the levels are wrongly used.


Thanks, updated.

> 

> >  			if (rss->types & MLX5_RSS_HF_MASK) {

> >  				rte_flow_error_set(error, EINVAL,

> >  						   RTE_FLOW_ERROR_TYPE_ACTION,

> > @@ -756,7 +769,7 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,

> >  			}

> >  			parser->rss_conf = (struct rte_flow_action_rss){

> >  				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,

> > -				.level = 0,

> > +				.level = rss->level,

> >  				.types = rss->types,

> >  				.key_len = rss_key_len,

> >  				.queue_num = rss->queue_num,

> > @@ -842,11 +855,12 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,

> >   *   0 on success, a negative errno value otherwise and rte_errno is

> set.

> >   */

> >  static int

> > -mlx5_flow_convert_items_validate(struct rte_eth_dev *dev

> > __rte_unused,

> > +mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,

> >  				 const struct rte_flow_item items[],

> >  				 struct rte_flow_error *error,

> >  				 struct mlx5_flow_parse *parser)

> >  {

> > +	struct priv *priv = dev->data->dev_private;

> >  	const struct mlx5_flow_items *cur_item = mlx5_flow_items;

> >  	unsigned int i;

> >  	int ret = 0;

> > @@ -886,6 +900,14 @@ mlx5_flow_convert_items_validate(struct rte_eth_dev

> *dev __rte_unused,

> >  						   " tunnel encapsulations.");

> >  				return -rte_errno;

> >  			}

> > +			if (!priv->config.tunnel_en &&

> > +			    parser->rss_conf.level) {

> > +				rte_flow_error_set(error, ENOTSUP,

> > +					RTE_FLOW_ERROR_TYPE_ITEM,

> > +					items,

> > +					"Tunnel offloading not enabled");

> 

> I would suggest "RSS on tunnel is not supported".


Thanks, updated.

> 

> > +				return -rte_errno;

> > +			}

> >  			parser->inner = IBV_FLOW_SPEC_INNER;

> >  			parser->tunnel = flow_ptype[items->type];

> >  		}

> > @@ -993,7 +1015,11 @@ static void

> >  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)  {

> >  	unsigned int i;

> > +	uint32_t inner = parser->inner;

> >

> > +	/* Don't create extra flows for outer RSS. */

> > +	if (parser->tunnel && !parser->rss_conf.level)

> > +		return;

> >  	/* Remove any other flow not matching the pattern. */

> >  	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {

> >  		for (i = 0; i != hash_rxq_init_n; ++i) { @@ -1014,23 +1040,25

> @@

> > mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)

> >  			struct ibv_flow_spec_ipv4_ext ipv4;

> >  			struct ibv_flow_spec_ipv6 ipv6;

> >  			struct ibv_flow_spec_tcp_udp udp_tcp;

> > +			struct ibv_flow_spec_eth eth;

> >  		} specs;

> >  		void *dst;

> >  		uint16_t size;

> >

> >  		if (i == parser->layer)

> >  			continue;

> > -		if (parser->layer == HASH_RXQ_ETH) {

> > +		if (parser->layer == HASH_RXQ_ETH ||

> > +		    parser->layer == HASH_RXQ_TUNNEL) {

> >  			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {

> >  				size = sizeof(struct ibv_flow_spec_ipv4_ext);

> >  				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){

> > -					.type = IBV_FLOW_SPEC_IPV4_EXT,

> > +					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,

> >  					.size = size,

> >  				};

> >  			} else {

> >  				size = sizeof(struct ibv_flow_spec_ipv6);

> >  				specs.ipv6 = (struct ibv_flow_spec_ipv6){

> > -					.type = IBV_FLOW_SPEC_IPV6,

> > +					.type = inner | IBV_FLOW_SPEC_IPV6,

> >  					.size = size,

> >  				};

> >  			}

> > @@ -1047,7 +1075,7 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse

> *parser)

> >  		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {

> >  			size = sizeof(struct ibv_flow_spec_tcp_udp);

> >  			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {

> > -				.type = ((i == HASH_RXQ_UDPV4 ||

> > +				.type = inner | ((i == HASH_RXQ_UDPV4 ||

> >  					  i == HASH_RXQ_UDPV6) ?

> >  					 IBV_FLOW_SPEC_UDP :

> >  					 IBV_FLOW_SPEC_TCP),

> > @@ -1068,6 +1096,8 @@ mlx5_flow_convert_finalise(struct

> > mlx5_flow_parse *parser)

> >  /**

> >   * Update flows according to pattern and RSS hash fields.

> >   *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> >   * @param[in, out] parser

> >   *   Internal parser structure.

> >   *

> > @@ -1075,20 +1105,63 @@ mlx5_flow_convert_finalise(struct

> mlx5_flow_parse *parser)

> >   *   0 on success, a negative errno value otherwise and rte_errno is

> set.

> >   */

> >  static int

> > -mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)

> > +mlx5_flow_convert_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse

> > +*parser)

> >  {

> > -	const unsigned int ipv4 =

> > +	unsigned int ipv4 =

> >  		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;

> >  	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 :

> HASH_RXQ_TCPV6;

> >  	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;

> >  	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 :

> HASH_RXQ_TCPV4;

> >  	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 :

> HASH_RXQ_IPV4;

> > -	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;

> > +	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;

> >  	unsigned int i;

> > +	int found = 0;

> >

> > -	if (parser->layer == HASH_RXQ_ETH)

> > +	/*

> > +	 * Outer RSS.

> > +	 * HASH_RXQ_ETH is the only rule since tunnel packet match this

> > +	 * rule must match outer pattern.

> > +	 */

> > +	if (parser->tunnel && !parser->rss_conf.level) {

> > +		/* Remove flows other than default. */

> > +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {

> > +			rte_free(parser->queue[i].ibv_attr);

> > +			parser->queue[i].ibv_attr = NULL;

> > +		}

> > +		ipv4 = hash_rxq_init[parser->out_layer].ip_version ==

> MLX5_IPV4;

> > +		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;

> > +		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &

> > +		    parser->rss_conf.types) {

> > +			parser->queue[HASH_RXQ_ETH].hash_fields =

> > +				hash_rxq_init[parser->out_layer].hash_fields;

> > +		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &

> > +		    parser->rss_conf.types)) {

> > +			parser->queue[HASH_RXQ_ETH].hash_fields =

> > +				hash_rxq_init[ip].hash_fields;

> > +		} else if (parser->rss_conf.types) {

> > +			DRV_LOG(WARNING,

> > +				"port %u rss outer hash function doesn't match"

> > +				" pattern", dev->data->port_id);

> 

> Hash function, what do you mean ?  It seems to be the layers on the ones

> the RSS is configured which does not match the Pattern.

> 

> Sincerely, I see such warning happening I will fully doubt on the fact the

> rule has been taken and applied.

> "port 0 rss outer hash function doesn't match pattern" --> what will

> happen to the packets matching such flow?  Will they be dropped?

> This is not helping at all, so please remove it.

> 

> > +		}

> > +		return 0;

> > +	}

> > +	if (parser->layer == HASH_RXQ_ETH || parser->layer ==

> HASH_RXQ_TUNNEL) {

> > +		/* Remove unused flows according to hash function. */

> > +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {

> > +			if (!parser->queue[i].ibv_attr)

> > +				continue;

> > +			if (hash_rxq_init[i].dpdk_rss_hf &

> > +			    parser->rss_conf.types) {

> > +				parser->queue[i].hash_fields =

> > +					hash_rxq_init[i].hash_fields;

> > +				continue;

> > +			}

> > +			rte_free(parser->queue[i].ibv_attr);

> > +			parser->queue[i].ibv_attr = NULL;

> > +		}

> >  		return 0;

> > -	/* This layer becomes useless as the pattern define under layers. */

> > +	}

> > +	/* Remove ETH layer flow. */

> >  	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);

> >  	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;

> >  	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4.

> > */ @@ -1098,9 +1171,52 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse

> *parser)

> >  		rte_free(parser->queue[i].ibv_attr);

> >  		parser->queue[i].ibv_attr = NULL;

> >  	}

> > -	/* Remove impossible flow according to the RSS configuration. */

> > -	if (hash_rxq_init[parser->layer].dpdk_rss_hf &

> > -	    parser->rss_conf.types) {

> > +	/*

> > +	 * Keep L4 flows as IP pattern has to support L4 RSS.

> > +	 * Otherwise, only keep the flow that match the pattern.

> > +	 */

> 

> This comment is not clear, please re-word it.

> 

> > +	if (parser->layer != ip) {

> > +		/* Only keep the flow that match the pattern. */

> > +		for (i = hmin; i != (hmax + 1); ++i) {

> > +			if (i == parser->layer)

> > +				continue;

> > +			rte_free(parser->queue[i].ibv_attr);

> > +			parser->queue[i].ibv_attr = NULL;

> > +		}

> > +	}

> > +	if (parser->rss_conf.types) {

> > +		/* Remove impossible flow according to the RSS configuration.

> */

> > +		for (i = hmin; i != (hmax + 1); ++i) {

> > +			if (!parser->queue[i].ibv_attr)

> > +				continue;

> > +			if (parser->rss_conf.types &

> > +			    hash_rxq_init[i].dpdk_rss_hf) {

> > +				parser->queue[i].hash_fields =

> > +					hash_rxq_init[i].hash_fields;

> > +				found = 1;

> > +				continue;

> > +			}

> > +			/* L4 flow could be used for L3 RSS. */

> > +			if (i == parser->layer && i < ip &&

> > +			    (hash_rxq_init[ip].dpdk_rss_hf &

> > +			     parser->rss_conf.types)) {

> > +				parser->queue[i].hash_fields =

> > +					hash_rxq_init[ip].hash_fields;

> > +				found = 1;

> > +				continue;

> > +			}

> > +			/* L3 flow and L4 hash: non-rss L3 flow. */

> > +			if (i == parser->layer && i == ip && found)

> > +				/* IP pattern and L4 HF. */

> > +				continue;

> > +			rte_free(parser->queue[i].ibv_attr);

> > +			parser->queue[i].ibv_attr = NULL;

> > +		}

> > +		if (!found)

> > +			DRV_LOG(WARNING,

> > +				"port %u rss hash function doesn't match "

> > +				"pattern", dev->data->port_id);

> 

> Dito.

> 

> > +	} else {

> >  		/* Remove any other flow. */

> >  		for (i = hmin; i != (hmax + 1); ++i) {

> >  			if (i == parser->layer || !parser->queue[i].ibv_attr) @@

> -1108,8

> > +1224,6 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)

> >  			rte_free(parser->queue[i].ibv_attr);

> >  			parser->queue[i].ibv_attr = NULL;

> >  		}

> > -	} else if (!parser->queue[ip].ibv_attr) {

> > -		/* no RSS possible with the current configuration. */

> >  		parser->rss_conf.queue_num = 1;

> >  	}

> >  	return 0;

> > @@ -1179,10 +1293,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  		for (i = 0; i != hash_rxq_init_n; ++i) {

> >  			unsigned int offset;

> >

> > -			if (!(parser->rss_conf.types &

> > -			      hash_rxq_init[i].dpdk_rss_hf) &&

> > -			    (i != HASH_RXQ_ETH))

> > -				continue;

> >  			offset = parser->queue[i].offset;

> >  			parser->queue[i].ibv_attr =

> >  				mlx5_flow_convert_allocate(offset, error); @@ -

> 1194,6 +1304,7 @@

> > mlx5_flow_convert(struct rte_eth_dev *dev,

> >  	/* Third step. Conversion parse, fill the specifications. */

> >  	parser->inner = 0;

> >  	parser->tunnel = 0;

> > +	parser->layer = HASH_RXQ_ETH;

> >  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {

> >  		struct mlx5_flow_data data = {

> >  			.parser = parser,

> > @@ -1211,23 +1322,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  		if (ret)

> >  			goto exit_free;

> >  	}

> > -	if (parser->mark)

> > -		mlx5_flow_create_flag_mark(parser, parser->mark_id);

> > -	if (parser->count && parser->create) {

> > -		mlx5_flow_create_count(dev, parser);

> > -		if (!parser->cs)

> > -			goto exit_count_error;

> > -	}

> >  	/*

> >  	 * Last step. Complete missing specification to reach the RSS

> >  	 * configuration.

> >  	 */

> >  	if (!parser->drop)

> > -		ret = mlx5_flow_convert_rss(parser);

> > +		ret = mlx5_flow_convert_rss(dev, parser);

> >  		if (ret)

> >  			goto exit_free;

> >  		mlx5_flow_convert_finalise(parser);

> >  	mlx5_flow_update_priority(dev, parser, attr);

> > +	if (parser->mark)

> > +		mlx5_flow_create_flag_mark(parser, parser->mark_id);

> > +	if (parser->count && parser->create) {

> > +		mlx5_flow_create_count(dev, parser);

> > +		if (!parser->cs)

> > +			goto exit_count_error;

> > +	}

> 

> Why do you need to move this code?


To avoid counter resource missing if anything wrong in function 
mlx5_flow_convert_rss().

> 

> >  exit_free:

> >  	/* Only verification is expected, all resources should be released.

> */

> >  	if (!parser->create) {

> > @@ -1275,17 +1386,11 @@ mlx5_flow_create_copy(struct mlx5_flow_parse

> *parser, void *src,

> >  	for (i = 0; i != hash_rxq_init_n; ++i) {

> >  		if (!parser->queue[i].ibv_attr)

> >  			continue;

> > -		/* Specification must be the same l3 type or none. */

> > -		if (parser->layer == HASH_RXQ_ETH ||

> > -		    (hash_rxq_init[parser->layer].ip_version ==

> > -		     hash_rxq_init[i].ip_version) ||

> > -		    (hash_rxq_init[i].ip_version == 0)) {

> > -			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +

> > -					parser->queue[i].offset);

> > -			memcpy(dst, src, size);

> > -			++parser->queue[i].ibv_attr->num_of_specs;

> > -			parser->queue[i].offset += size;

> > -		}

> > +		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +

> > +				parser->queue[i].offset);

> > +		memcpy(dst, src, size);

> > +		++parser->queue[i].ibv_attr->num_of_specs;

> > +		parser->queue[i].offset += size;

> >  	}

> >  }

> >

> > @@ -1316,9 +1421,7 @@ mlx5_flow_create_eth(const struct rte_flow_item

> *item,

> >  		.size = eth_size,

> >  	};

> >

> > -	/* Don't update layer for the inner pattern. */

> > -	if (!parser->inner)

> > -		parser->layer = HASH_RXQ_ETH;

> > +	parser->layer = HASH_RXQ_ETH;

> >  	if (spec) {

> >  		unsigned int i;

> >

> > @@ -1431,9 +1534,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item

> *item,

> >  		.size = ipv4_size,

> >  	};

> >

> > -	/* Don't update layer for the inner pattern. */

> > -	if (!parser->inner)

> > -		parser->layer = HASH_RXQ_IPV4;

> > +	parser->layer = HASH_RXQ_IPV4;

> >  	if (spec) {

> >  		if (!mask)

> >  			mask = default_mask;

> > @@ -1486,9 +1587,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item

> *item,

> >  		.size = ipv6_size,

> >  	};

> >

> > -	/* Don't update layer for the inner pattern. */

> > -	if (!parser->inner)

> > -		parser->layer = HASH_RXQ_IPV6;

> > +	parser->layer = HASH_RXQ_IPV6;

> >  	if (spec) {

> >  		unsigned int i;

> >  		uint32_t vtc_flow_val;

> > @@ -1561,13 +1660,10 @@ mlx5_flow_create_udp(const struct rte_flow_item

> *item,

> >  		.size = udp_size,

> >  	};

> >

> > -	/* Don't update layer for the inner pattern. */

> > -	if (!parser->inner) {

> > -		if (parser->layer == HASH_RXQ_IPV4)

> > -			parser->layer = HASH_RXQ_UDPV4;

> > -		else

> > -			parser->layer = HASH_RXQ_UDPV6;

> > -	}

> > +	if (parser->layer == HASH_RXQ_IPV4)

> > +		parser->layer = HASH_RXQ_UDPV4;

> > +	else

> > +		parser->layer = HASH_RXQ_UDPV6;

> >  	if (spec) {

> >  		if (!mask)

> >  			mask = default_mask;

> > @@ -1610,13 +1706,10 @@ mlx5_flow_create_tcp(const struct rte_flow_item

> *item,

> >  		.size = tcp_size,

> >  	};

> >

> > -	/* Don't update layer for the inner pattern. */

> > -	if (!parser->inner) {

> > -		if (parser->layer == HASH_RXQ_IPV4)

> > -			parser->layer = HASH_RXQ_TCPV4;

> > -		else

> > -			parser->layer = HASH_RXQ_TCPV6;

> > -	}

> > +	if (parser->layer == HASH_RXQ_IPV4)

> > +		parser->layer = HASH_RXQ_TCPV4;

> > +	else

> > +		parser->layer = HASH_RXQ_TCPV6;

> >  	if (spec) {

> >  		if (!mask)

> >  			mask = default_mask;

> > @@ -1666,6 +1759,8 @@ mlx5_flow_create_vxlan(const struct rte_flow_item

> *item,

> >  	id.vni[0] = 0;

> >  	parser->inner = IBV_FLOW_SPEC_INNER;

> >  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];

> > +	parser->out_layer = parser->layer;

> > +	parser->layer = HASH_RXQ_TUNNEL;

> >  	if (spec) {

> >  		if (!mask)

> >  			mask = default_mask;

> > @@ -1720,6 +1815,8 @@ mlx5_flow_create_gre(const struct rte_flow_item

> > *item __rte_unused,

> >

> >  	parser->inner = IBV_FLOW_SPEC_INNER;

> >  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];

> > +	parser->out_layer = parser->layer;

> > +	parser->layer = HASH_RXQ_TUNNEL;

> >  	mlx5_flow_create_copy(parser, &tunnel, size);

> >  	return 0;

> >  }

> > @@ -1883,33 +1980,33 @@ mlx5_flow_create_action_queue_rss(struct

> rte_eth_dev *dev,

> >  	unsigned int i;

> >

> >  	for (i = 0; i != hash_rxq_init_n; ++i) {

> > -		uint64_t hash_fields;

> > -

> >  		if (!parser->queue[i].ibv_attr)

> >  			continue;

> >  		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;

> >  		parser->queue[i].ibv_attr = NULL;

> > -		hash_fields = hash_rxq_init[i].hash_fields;

> > +		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;

> >  		if (!priv->dev->data->dev_started)

> >  			continue;

> >  		flow->frxq[i].hrxq =

> >  			mlx5_hrxq_get(dev,

> >  				      parser->rss_conf.key,

> >  				      parser->rss_conf.key_len,

> > -				      hash_fields,

> > +				      flow->frxq[i].hash_fields,

> >  				      parser->rss_conf.queue,

> >  				      parser->rss_conf.queue_num,

> > -				      parser->tunnel);

> > +				      parser->tunnel,

> > +				      parser->rss_conf.level);

> >  		if (flow->frxq[i].hrxq)

> >  			continue;

> >  		flow->frxq[i].hrxq =

> >  			mlx5_hrxq_new(dev,

> >  				      parser->rss_conf.key,

> >  				      parser->rss_conf.key_len,

> > -				      hash_fields,

> > +				      flow->frxq[i].hash_fields,

> >  				      parser->rss_conf.queue,

> >  				      parser->rss_conf.queue_num,

> > -				      parser->tunnel);

> > +				      parser->tunnel,

> > +				      parser->rss_conf.level);

> >  		if (!flow->frxq[i].hrxq) {

> >  			return rte_flow_error_set(error, ENOMEM,

> >  						  RTE_FLOW_ERROR_TYPE_HANDLE,

> > @@ -2006,7 +2103,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev

> *dev,

> >  		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",

> >  			dev->data->port_id,

> >  			(void *)flow, i,

> > -			(void *)flow->frxq[i].hrxq,

> > +			(void *)flow->frxq[i].hrxq->qp,

> >  			(void *)flow->frxq[i].ibv_flow);

> >  	}

> >  	if (!flows_n) {

> > @@ -2532,19 +2629,21 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct

> mlx5_flows *list)

> >  			flow->frxq[i].hrxq =

> >  				mlx5_hrxq_get(dev, flow->rss_conf.key,

> >  					      flow->rss_conf.key_len,

> > -					      hash_rxq_init[i].hash_fields,

> > +					      flow->frxq[i].hash_fields,

> >  					      flow->rss_conf.queue,

> >  					      flow->rss_conf.queue_num,

> > -					      flow->tunnel);

> > +					      flow->tunnel,

> > +					      flow->rss_conf.level);

> >  			if (flow->frxq[i].hrxq)

> >  				goto flow_create;

> >  			flow->frxq[i].hrxq =

> >  				mlx5_hrxq_new(dev, flow->rss_conf.key,

> >  					      flow->rss_conf.key_len,

> > -					      hash_rxq_init[i].hash_fields,

> > +					      flow->frxq[i].hash_fields,

> >  					      flow->rss_conf.queue,

> >  					      flow->rss_conf.queue_num,

> > -					      flow->tunnel);

> > +					      flow->tunnel,

> > +					      flow->rss_conf.level);

> >  			if (!flow->frxq[i].hrxq) {

> >  				DRV_LOG(DEBUG,

> >  					"port %u flow %p cannot be applied", diff --

> git

> > a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c index

> > be684d378..6874aa32a 100644

> > --- a/drivers/net/mlx5/mlx5_glue.c

> > +++ b/drivers/net/mlx5/mlx5_glue.c

> > @@ -313,6 +313,21 @@ mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj,

> uint64_t obj_type)

> >  	return mlx5dv_init_obj(obj, obj_type);  }

> >

> > +static struct ibv_qp *

> > +mlx5_glue_dv_create_qp(struct ibv_context *context,

> > +		       struct ibv_qp_init_attr_ex *qp_init_attr_ex,

> > +		       struct mlx5dv_qp_init_attr *dv_qp_init_attr) { #ifdef

> > +HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > +	return mlx5dv_create_qp(context, qp_init_attr_ex, dv_qp_init_attr);

> > +#else

> > +	(void)context;

> > +	(void)qp_init_attr_ex;

> > +	(void)dv_qp_init_attr;

> > +	return NULL;

> > +#endif

> > +}

> > +

> >  const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){

> >  	.version = MLX5_GLUE_VERSION,

> >  	.fork_init = mlx5_glue_fork_init,

> > @@ -356,4 +371,5 @@ const struct mlx5_glue *mlx5_glue = &(const struct

> mlx5_glue){

> >  	.dv_query_device = mlx5_glue_dv_query_device,

> >  	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,

> >  	.dv_init_obj = mlx5_glue_dv_init_obj,

> > +	.dv_create_qp = mlx5_glue_dv_create_qp,

> >  };

> > diff --git a/drivers/net/mlx5/mlx5_glue.h

> > b/drivers/net/mlx5/mlx5_glue.h index b5efee3b6..841363872 100644

> > --- a/drivers/net/mlx5/mlx5_glue.h

> > +++ b/drivers/net/mlx5/mlx5_glue.h

> > @@ -31,6 +31,10 @@ struct ibv_counter_set_init_attr;  struct

> > ibv_query_counter_set_attr;  #endif

> >

> > +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT struct mlx5dv_qp_init_attr;

> > +#endif

> > +

> >  /* LIB_GLUE_VERSION must be updated every time this structure is

> > modified. */  struct mlx5_glue {

> >  	const char *version;

> > @@ -106,6 +110,10 @@ struct mlx5_glue {

> >  				   enum mlx5dv_set_ctx_attr_type type,

> >  				   void *attr);

> >  	int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);

> > +	struct ibv_qp *(*dv_create_qp)

> > +		(struct ibv_context *context,

> > +		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,

> > +		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);

> >  };

> >

> >  const struct mlx5_glue *mlx5_glue;

> > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c

> > index 073732e16..6e5565fb2 100644

> > --- a/drivers/net/mlx5/mlx5_rxq.c

> > +++ b/drivers/net/mlx5/mlx5_rxq.c

> > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)

> >   *   Number of queues.

> >   * @param tunnel

> >   *   Tunnel type.

> > + * @param rss_level

> > + *   RSS hash on tunnel level.

> >   *

> >   * @return

> >   *   The Verbs object initialised, NULL otherwise and rte_errno is set.

> > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct

> > rte_eth_dev *dev,

> >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> >  	      uint64_t hash_fields,

> > -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)

> > +	      const uint16_t *queues, uint32_t queues_n,

> > +	      uint32_t tunnel, uint32_t rss_level)

> 

> tunnel and rss_level seems to be redundant here.

> 

> rss_level > 1 is equivalent to tunnel, there is no need to have both.


There is a case of tunnel and outer rss(1).

> 

> >  {

> >  	struct priv *priv = dev->data->dev_private;

> >  	struct mlx5_hrxq *hrxq;

> >  	struct mlx5_ind_table_ibv *ind_tbl;

> >  	struct ibv_qp *qp;

> >  	int err;

> > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > +	struct mlx5dv_qp_init_attr qp_init_attr = {0}; #endif

> >

> >  	queues_n = hash_fields ? queues_n : 1;

> >  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); @@ -1410,6

> > +1416,33 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  		rte_errno = ENOMEM;

> >  		return NULL;

> >  	}

> > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > +	if (tunnel) {

> > +		qp_init_attr.comp_mask =

> > +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;

> > +		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;

> > +	}

> > +	qp = mlx5_glue->dv_create_qp(

> > +		priv->ctx,

> > +		&(struct ibv_qp_init_attr_ex){

> > +			.qp_type = IBV_QPT_RAW_PACKET,

> > +			.comp_mask =

> > +				IBV_QP_INIT_ATTR_PD |

> > +				IBV_QP_INIT_ATTR_IND_TABLE |

> > +				IBV_QP_INIT_ATTR_RX_HASH,

> > +			.rx_hash_conf = (struct ibv_rx_hash_conf){

> > +				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,

> > +				.rx_hash_key_len = rss_key_len,

> > +				.rx_hash_key = (void *)(uintptr_t)rss_key,

> > +				.rx_hash_fields_mask = hash_fields |

> > +					(tunnel && rss_level ?

> > +					(uint32_t)IBV_RX_HASH_INNER : 0),

> > +			},

> > +			.rwq_ind_tbl = ind_tbl->ind_table,

> > +			.pd = priv->pd,

> > +		},

> > +		&qp_init_attr);

> > +#else

> >  	qp = mlx5_glue->create_qp_ex

> >  		(priv->ctx,

> >  		 &(struct ibv_qp_init_attr_ex){

> > @@ -1427,6 +1460,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  			.rwq_ind_tbl = ind_tbl->ind_table,

> >  			.pd = priv->pd,

> >  		 });

> > +#endif

> >  	if (!qp) {

> >  		rte_errno = errno;

> >  		goto error;

> > @@ -1439,6 +1473,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  	hrxq->rss_key_len = rss_key_len;

> >  	hrxq->hash_fields = hash_fields;

> >  	hrxq->tunnel = tunnel;

> > +	hrxq->rss_level = rss_level;

> >  	memcpy(hrxq->rss_key, rss_key, rss_key_len);

> >  	rte_atomic32_inc(&hrxq->refcnt);

> >  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); @@ -1448,6 +1483,8 @@

> > mlx5_hrxq_new(struct rte_eth_dev *dev,

> >  	return hrxq;

> >  error:

> >  	err = rte_errno; /* Save rte_errno before cleanup. */

> > +	DRV_LOG(ERR, "port %u: Error creating Hash Rx queue",

> > +		dev->data->port_id);

> 

> Developer log, please remove it, for the user the flow won't be created

> with the correct error reported.


Removed, there was a log in caller side.

> 

> >  	mlx5_ind_table_ibv_release(dev, ind_tbl);

> >  	if (qp)

> >  		claim_zero(mlx5_glue->destroy_qp(qp));

> > @@ -1469,6 +1506,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> >   *   Number of queues.

> >   * @param tunnel

> >   *   Tunnel type.

> > + * @param rss_level

> > + *   RSS hash on tunnel level

> >   *

> >   * @return

> >   *   An hash Rx queue on success.

> > @@ -1477,7 +1516,8 @@ struct mlx5_hrxq *  mlx5_hrxq_get(struct

> > rte_eth_dev *dev,

> >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> >  	      uint64_t hash_fields,

> > -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)

> > +	      const uint16_t *queues, uint32_t queues_n,

> > +	      uint32_t tunnel, uint32_t rss_level)

> 

> Dito.

> 

> >  {

> >  	struct priv *priv = dev->data->dev_private;

> >  	struct mlx5_hrxq *hrxq;

> > @@ -1494,6 +1534,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,

> >  			continue;

> >  		if (hrxq->tunnel != tunnel)

> >  			continue;

> > +		if (hrxq->rss_level != rss_level)

> > +			continue;

> >  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);

> >  		if (!ind_tbl)

> >  			continue;

> > diff --git a/drivers/net/mlx5/mlx5_rxtx.h

> > b/drivers/net/mlx5/mlx5_rxtx.h index d35605b55..62cf55109 100644

> > --- a/drivers/net/mlx5/mlx5_rxtx.h

> > +++ b/drivers/net/mlx5/mlx5_rxtx.h

> > @@ -147,6 +147,7 @@ struct mlx5_hrxq {

> >  	struct ibv_qp *qp; /* Verbs queue pair. */

> >  	uint64_t hash_fields; /* Verbs Hash fields. */

> >  	uint32_t tunnel; /* Tunnel type. */

> > +	uint32_t rss_level; /* RSS on tunnel level. */

> >  	uint32_t rss_key_len; /* Hash key length in bytes. */

> >  	uint8_t rss_key[]; /* Hash key. */

> >  };

> > @@ -251,12 +252,12 @@ struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev

> *dev,

> >  				const uint8_t *rss_key, uint32_t rss_key_len,

> >  				uint64_t hash_fields,

> >  				const uint16_t *queues, uint32_t queues_n,

> > -				uint32_t tunnel);

> > +				uint32_t tunnel, uint32_t rss_level);

> >  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,

> >  				const uint8_t *rss_key, uint32_t rss_key_len,

> >  				uint64_t hash_fields,

> >  				const uint16_t *queues, uint32_t queues_n,

> > -				uint32_t tunnel);

> > +				uint32_t tunnel, uint32_t rss_level);

> >  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq

> > *hxrq);  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);  uint64_t

> > mlx5_get_rx_port_offloads(void);

> > --

> > 2.13.3

> >

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 16, 2018, 7:14 a.m. UTC | #3
On Sat, Apr 14, 2018 at 12:25:12PM +0000, Xueming(Steven) Li wrote:
>[...]
> > > @@ -1211,23 +1322,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> > >  		if (ret)
> > >  			goto exit_free;
> > >  	}
> > > -	if (parser->mark)
> > > -		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> > > -	if (parser->count && parser->create) {
> > > -		mlx5_flow_create_count(dev, parser);
> > > -		if (!parser->cs)
> > > -			goto exit_count_error;
> > > -	}
> > >  	/*
> > >  	 * Last step. Complete missing specification to reach the RSS
> > >  	 * configuration.
> > >  	 */
> > >  	if (!parser->drop)
> > > -		ret = mlx5_flow_convert_rss(parser);
> > > +		ret = mlx5_flow_convert_rss(dev, parser);
> > >  		if (ret)
> > >  			goto exit_free;
> > >  		mlx5_flow_convert_finalise(parser);
> > >  	mlx5_flow_update_priority(dev, parser, attr);
> > > +	if (parser->mark)
> > > +		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> > > +	if (parser->count && parser->create) {
> > > +		mlx5_flow_create_count(dev, parser);
> > > +		if (!parser->cs)
> > > +			goto exit_count_error;
> > > +	}
> > 
> > Why do you need to move this code?
> 
> To avoid counter resource missing if anything wrong in function 
> mlx5_flow_convert_rss().

Why this modification is addressed in this patch, why should it it be in
the patch introducing the mlx5_flow_convert_rss()?

>[...]
> > > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
> > >   *   Number of queues.
> > >   * @param tunnel
> > >   *   Tunnel type.
> > > + * @param rss_level
> > > + *   RSS hash on tunnel level.
> > >   *
> > >   * @return
> > >   *   The Verbs object initialised, NULL otherwise and rte_errno is set.
> > > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct
> > > rte_eth_dev *dev,
> > >  	      const uint8_t *rss_key, uint32_t rss_key_len,
> > >  	      uint64_t hash_fields,
> > > -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> > > +	      const uint16_t *queues, uint32_t queues_n,
> > > +	      uint32_t tunnel, uint32_t rss_level)
> > 
> > tunnel and rss_level seems to be redundant here.
> > 
> > rss_level > 1 is equivalent to tunnel, there is no need to have both.
> 
> There is a case of tunnel and outer rss(1).

Why cannot it be handled by a regular Hash Rx queue, i.e. what is the
benefit of creating a tunnel hash Rx queue to make the same job as a
legacy one?

See below,

> > >  {
> > >  	struct priv *priv = dev->data->dev_private;
> > >  	struct mlx5_hrxq *hrxq;
> > >  	struct mlx5_ind_table_ibv *ind_tbl;
> > >  	struct ibv_qp *qp;
> > >  	int err;
> > > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > > +	struct mlx5dv_qp_init_attr qp_init_attr = {0}; #endif
> > >
> > >  	queues_n = hash_fields ? queues_n : 1;
> > >  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); @@ -1410,6
> > > +1416,33 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
> > >  		rte_errno = ENOMEM;
> > >  		return NULL;
> > >  	}
> > > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > > +	if (tunnel) {

Why not: if (rss_level > 1) ?

> > > +		qp_init_attr.comp_mask =
> > > +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
> > > +		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
> > > +	}
> > > +	qp = mlx5_glue->dv_create_qp(
> > > +		priv->ctx,
> > > +		&(struct ibv_qp_init_attr_ex){
> > > +			.qp_type = IBV_QPT_RAW_PACKET,
> > > +			.comp_mask =
> > > +				IBV_QP_INIT_ATTR_PD |
> > > +				IBV_QP_INIT_ATTR_IND_TABLE |
> > > +				IBV_QP_INIT_ATTR_RX_HASH,
> > > +			.rx_hash_conf = (struct ibv_rx_hash_conf){
> > > +				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> > > +				.rx_hash_key_len = rss_key_len,
> > > +				.rx_hash_key = (void *)(uintptr_t)rss_key,
> > > +				.rx_hash_fields_mask = hash_fields |
> > > +					(tunnel && rss_level ?
> > > +					(uint32_t)IBV_RX_HASH_INNER : 0),
>[...]

 .rx_hash_fields_mask = hash_fields |
 (rss_level > 1) ?
 (uint32_t)IBV_RX_HASH_INNER : 0),

Thanks,
  
Xueming Li April 16, 2018, 7:46 a.m. UTC | #4
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Monday, April 16, 2018 3:14 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 07/15] net/mlx5: support tunnel RSS level

> 

> On Sat, Apr 14, 2018 at 12:25:12PM +0000, Xueming(Steven) Li wrote:

> >[...]

> > > > @@ -1211,23 +1322,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> > > >  		if (ret)

> > > >  			goto exit_free;

> > > >  	}

> > > > -	if (parser->mark)

> > > > -		mlx5_flow_create_flag_mark(parser, parser->mark_id);

> > > > -	if (parser->count && parser->create) {

> > > > -		mlx5_flow_create_count(dev, parser);

> > > > -		if (!parser->cs)

> > > > -			goto exit_count_error;

> > > > -	}

> > > >  	/*

> > > >  	 * Last step. Complete missing specification to reach the RSS

> > > >  	 * configuration.

> > > >  	 */

> > > >  	if (!parser->drop)

> > > > -		ret = mlx5_flow_convert_rss(parser);

> > > > +		ret = mlx5_flow_convert_rss(dev, parser);

> > > >  		if (ret)

> > > >  			goto exit_free;

> > > >  		mlx5_flow_convert_finalise(parser);

> > > >  	mlx5_flow_update_priority(dev, parser, attr);

> > > > +	if (parser->mark)

> > > > +		mlx5_flow_create_flag_mark(parser, parser->mark_id);

> > > > +	if (parser->count && parser->create) {

> > > > +		mlx5_flow_create_count(dev, parser);

> > > > +		if (!parser->cs)

> > > > +			goto exit_count_error;

> > > > +	}

> > >

> > > Why do you need to move this code?

> >

> > To avoid counter resource missing if anything wrong in function

> > mlx5_flow_convert_rss().

> 

> Why this modification is addressed in this patch, why should it it be in

> the patch introducing the mlx5_flow_convert_rss()?


Good catch, I'll update.
> 

> >[...]

> > > > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev

> *dev)

> > > >   *   Number of queues.

> > > >   * @param tunnel

> > > >   *   Tunnel type.

> > > > + * @param rss_level

> > > > + *   RSS hash on tunnel level.

> > > >   *

> > > >   * @return

> > > >   *   The Verbs object initialised, NULL otherwise and rte_errno is

> set.

> > > > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct

> > > > rte_eth_dev *dev,

> > > >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> > > >  	      uint64_t hash_fields,

> > > > -	      const uint16_t *queues, uint32_t queues_n, uint32_t

> tunnel)

> > > > +	      const uint16_t *queues, uint32_t queues_n,

> > > > +	      uint32_t tunnel, uint32_t rss_level)

> > >

> > > tunnel and rss_level seems to be redundant here.

> > >

> > > rss_level > 1 is equivalent to tunnel, there is no need to have both.

> >

> > There is a case of tunnel and outer rss(1).

> 

> Why cannot it be handled by a regular Hash Rx queue, i.e. what is the

> benefit of creating a tunnel hash Rx queue to make the same job as a

> legacy one?


Tunnel checksum, ptype and rss offloading demand a QP to be created by DV api with
tunnel offload flags.

> 

> See below,

> 

> > > >  {

> > > >  	struct priv *priv = dev->data->dev_private;

> > > >  	struct mlx5_hrxq *hrxq;

> > > >  	struct mlx5_ind_table_ibv *ind_tbl;

> > > >  	struct ibv_qp *qp;

> > > >  	int err;

> > > > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > > > +	struct mlx5dv_qp_init_attr qp_init_attr = {0}; #endif

> > > >

> > > >  	queues_n = hash_fields ? queues_n : 1;

> > > >  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); @@

> > > > -1410,6

> > > > +1416,33 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,

> > > >  		rte_errno = ENOMEM;

> > > >  		return NULL;

> > > >  	}

> > > > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT

> > > > +	if (tunnel) {

> 

> Why not: if (rss_level > 1) ?


Besides rss, ptype and checksum has to take advantage of tunnel offloading.

> 

> > > > +		qp_init_attr.comp_mask =

> > > > +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;

> > > > +		qp_init_attr.create_flags =

> MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;

> > > > +	}

> > > > +	qp = mlx5_glue->dv_create_qp(

> > > > +		priv->ctx,

> > > > +		&(struct ibv_qp_init_attr_ex){

> > > > +			.qp_type = IBV_QPT_RAW_PACKET,

> > > > +			.comp_mask =

> > > > +				IBV_QP_INIT_ATTR_PD |

> > > > +				IBV_QP_INIT_ATTR_IND_TABLE |

> > > > +				IBV_QP_INIT_ATTR_RX_HASH,

> > > > +			.rx_hash_conf = (struct ibv_rx_hash_conf){

> > > > +				.rx_hash_function =

> IBV_RX_HASH_FUNC_TOEPLITZ,

> > > > +				.rx_hash_key_len = rss_key_len,

> > > > +				.rx_hash_key = (void *)(uintptr_t)rss_key,

> > > > +				.rx_hash_fields_mask = hash_fields |

> > > > +					(tunnel && rss_level ?

> > > > +					(uint32_t)IBV_RX_HASH_INNER : 0),

> >[...]

> 

>  .rx_hash_fields_mask = hash_fields |

>  (rss_level > 1) ?

>  (uint32_t)IBV_RX_HASH_INNER : 0),


Thanks, rss_level has been fixed according new rule.

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 16, 2018, 8:09 a.m. UTC | #5
On Mon, Apr 16, 2018 at 07:46:08AM +0000, Xueming(Steven) Li wrote:
>[...]
> > > > > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev
> > *dev)
> > > > >   *   Number of queues.
> > > > >   * @param tunnel
> > > > >   *   Tunnel type.
> > > > > + * @param rss_level
> > > > > + *   RSS hash on tunnel level.
> > > > >   *
> > > > >   * @return
> > > > >   *   The Verbs object initialised, NULL otherwise and rte_errno is
> > set.
> > > > > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct
> > > > > rte_eth_dev *dev,
> > > > >  	      const uint8_t *rss_key, uint32_t rss_key_len,
> > > > >  	      uint64_t hash_fields,
> > > > > -	      const uint16_t *queues, uint32_t queues_n, uint32_t
> > tunnel)
> > > > > +	      const uint16_t *queues, uint32_t queues_n,
> > > > > +	      uint32_t tunnel, uint32_t rss_level)
> > > >
> > > > tunnel and rss_level seems to be redundant here.
> > > >
> > > > rss_level > 1 is equivalent to tunnel, there is no need to have both.
> > >
> > > There is a case of tunnel and outer rss(1).
> > 
> > Why cannot it be handled by a regular Hash Rx queue, i.e. what is the
> > benefit of creating a tunnel hash Rx queue to make the same job as a
> > legacy one?
> 
> Tunnel checksum, ptype and rss offloading demand a QP to be created by DV api with
> tunnel offload flags.

I was expecting such answer, such information should be present in the
function documentation, can you add it?

Thanks,
  
Xueming Li April 16, 2018, 10:06 a.m. UTC | #6
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Monday, April 16, 2018 4:09 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 07/15] net/mlx5: support tunnel RSS level

> 

> On Mon, Apr 16, 2018 at 07:46:08AM +0000, Xueming(Steven) Li wrote:

> >[...]

> > > > > > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct

> > > > > > rte_eth_dev

> > > *dev)

> > > > > >   *   Number of queues.

> > > > > >   * @param tunnel

> > > > > >   *   Tunnel type.

> > > > > > + * @param rss_level

> > > > > > + *   RSS hash on tunnel level.

> > > > > >   *

> > > > > >   * @return

> > > > > >   *   The Verbs object initialised, NULL otherwise and rte_errno

> is

> > > set.

> > > > > > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *

> > > > > > mlx5_hrxq_new(struct rte_eth_dev *dev,

> > > > > >  	      const uint8_t *rss_key, uint32_t rss_key_len,

> > > > > >  	      uint64_t hash_fields,

> > > > > > -	      const uint16_t *queues, uint32_t queues_n, uint32_t

> > > tunnel)

> > > > > > +	      const uint16_t *queues, uint32_t queues_n,

> > > > > > +	      uint32_t tunnel, uint32_t rss_level)

> > > > >

> > > > > tunnel and rss_level seems to be redundant here.

> > > > >

> > > > > rss_level > 1 is equivalent to tunnel, there is no need to have

> both.

> > > >

> > > > There is a case of tunnel and outer rss(1).

> > >

> > > Why cannot it be handled by a regular Hash Rx queue, i.e. what is

> > > the benefit of creating a tunnel hash Rx queue to make the same job

> > > as a legacy one?

> >

> > Tunnel checksum, ptype and rss offloading demand a QP to be created by

> > DV api with tunnel offload flags.

> 

> I was expecting such answer, such information should be present in the

> function documentation, can you add it?


You mean https://dpdk.org/doc/guides/nics/overview.html?
"Inner L3 checksum" and "Inner L4 checksum" defined. 
I added "Inner RSS" per your suggestion, The only thing missing is 
"Innner packet type", make sense?

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 16, 2018, 12:27 p.m. UTC | #7
On Mon, Apr 16, 2018 at 10:06:06AM +0000, Xueming(Steven) Li wrote:
> 
> 
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Monday, April 16, 2018 4:09 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 07/15] net/mlx5: support tunnel RSS level
> > 
> > On Mon, Apr 16, 2018 at 07:46:08AM +0000, Xueming(Steven) Li wrote:
> > >[...]
> > > > > > > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct
> > > > > > > rte_eth_dev
> > > > *dev)
> > > > > > >   *   Number of queues.
> > > > > > >   * @param tunnel
> > > > > > >   *   Tunnel type.
> > > > > > > + * @param rss_level
> > > > > > > + *   RSS hash on tunnel level.
> > > > > > >   *
> > > > > > >   * @return
> > > > > > >   *   The Verbs object initialised, NULL otherwise and rte_errno
> > is
> > > > set.
> > > > > > > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *
> > > > > > > mlx5_hrxq_new(struct rte_eth_dev *dev,
> > > > > > >  	      const uint8_t *rss_key, uint32_t rss_key_len,
> > > > > > >  	      uint64_t hash_fields,
> > > > > > > -	      const uint16_t *queues, uint32_t queues_n, uint32_t
> > > > tunnel)
> > > > > > > +	      const uint16_t *queues, uint32_t queues_n,
> > > > > > > +	      uint32_t tunnel, uint32_t rss_level)
> > > > > >
> > > > > > tunnel and rss_level seems to be redundant here.
> > > > > >
> > > > > > rss_level > 1 is equivalent to tunnel, there is no need to have
> > both.
> > > > >
> > > > > There is a case of tunnel and outer rss(1).
> > > >
> > > > Why cannot it be handled by a regular Hash Rx queue, i.e. what is
> > > > the benefit of creating a tunnel hash Rx queue to make the same job
> > > > as a legacy one?
> > >
> > > Tunnel checksum, ptype and rss offloading demand a QP to be created by
> > > DV api with tunnel offload flags.
> > 
> > I was expecting such answer, such information should be present in the
> > function documentation, can you add it?
> 
> You mean https://dpdk.org/doc/guides/nics/overview.html?
> "Inner L3 checksum" and "Inner L4 checksum" defined. 
> I added "Inner RSS" per your suggestion, The only thing missing is 
> "Innner packet type", make sense?

No I mean adding in this function doxygen documentation the fact than
tunnel is to have the checksum offload whereas the rss_level will be to
enable the RSS in the inner.

Thanks,
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index ae118ad33..f9a6c460b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -35,7 +35,7 @@  include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_mlx5.a
 LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
 LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
-LIB_GLUE_VERSION = 18.02.0
+LIB_GLUE_VERSION = 18.05.0
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 64658bc0e..66c7d7993 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -113,6 +113,7 @@  enum hash_rxq_type {
 	HASH_RXQ_UDPV6,
 	HASH_RXQ_IPV6,
 	HASH_RXQ_ETH,
+	HASH_RXQ_TUNNEL,
 };
 
 /* Initialization data for hash RX queue. */
@@ -451,6 +452,7 @@  struct mlx5_flow_parse {
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint8_t rss_key[40]; /**< copy of the RSS key. */
 	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
 	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
 	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
 	struct {
@@ -458,6 +460,7 @@  struct mlx5_flow_parse {
 		/**< Pointer to Verbs attributes. */
 		unsigned int offset;
 		/**< Current position or total size of the attribute. */
+		uint64_t hash_fields; /**< Verbs hash fields. */
 	} queue[RTE_DIM(hash_rxq_init)];
 };
 
@@ -698,7 +701,8 @@  mlx5_flow_convert_actions(struct rte_eth_dev *dev,
 						   " function is Toeplitz");
 				return -rte_errno;
 			}
-			if (rss->level) {
+#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+			if (parser->rss_conf.level > 0) {
 				rte_flow_error_set(error, EINVAL,
 						   RTE_FLOW_ERROR_TYPE_ACTION,
 						   actions,
@@ -706,6 +710,15 @@  mlx5_flow_convert_actions(struct rte_eth_dev *dev,
 						   " level is not supported");
 				return -rte_errno;
 			}
+#endif
+			if (parser->rss_conf.level > 1) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "RSS encapsulation level"
+						   " > 1 is not supported");
+				return -rte_errno;
+			}
 			if (rss->types & MLX5_RSS_HF_MASK) {
 				rte_flow_error_set(error, EINVAL,
 						   RTE_FLOW_ERROR_TYPE_ACTION,
@@ -756,7 +769,7 @@  mlx5_flow_convert_actions(struct rte_eth_dev *dev,
 			}
 			parser->rss_conf = (struct rte_flow_action_rss){
 				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
-				.level = 0,
+				.level = rss->level,
 				.types = rss->types,
 				.key_len = rss_key_len,
 				.queue_num = rss->queue_num,
@@ -842,11 +855,12 @@  mlx5_flow_convert_actions(struct rte_eth_dev *dev,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_convert_items_validate(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
 				 const struct rte_flow_item items[],
 				 struct rte_flow_error *error,
 				 struct mlx5_flow_parse *parser)
 {
+	struct priv *priv = dev->data->dev_private;
 	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
 	unsigned int i;
 	int ret = 0;
@@ -886,6 +900,14 @@  mlx5_flow_convert_items_validate(struct rte_eth_dev *dev __rte_unused,
 						   " tunnel encapsulations.");
 				return -rte_errno;
 			}
+			if (!priv->config.tunnel_en &&
+			    parser->rss_conf.level) {
+				rte_flow_error_set(error, ENOTSUP,
+					RTE_FLOW_ERROR_TYPE_ITEM,
+					items,
+					"Tunnel offloading not enabled");
+				return -rte_errno;
+			}
 			parser->inner = IBV_FLOW_SPEC_INNER;
 			parser->tunnel = flow_ptype[items->type];
 		}
@@ -993,7 +1015,11 @@  static void
 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 {
 	unsigned int i;
+	uint32_t inner = parser->inner;
 
+	/* Don't create extra flows for outer RSS. */
+	if (parser->tunnel && !parser->rss_conf.level)
+		return;
 	/* Remove any other flow not matching the pattern. */
 	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
 		for (i = 0; i != hash_rxq_init_n; ++i) {
@@ -1014,23 +1040,25 @@  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 			struct ibv_flow_spec_ipv4_ext ipv4;
 			struct ibv_flow_spec_ipv6 ipv6;
 			struct ibv_flow_spec_tcp_udp udp_tcp;
+			struct ibv_flow_spec_eth eth;
 		} specs;
 		void *dst;
 		uint16_t size;
 
 		if (i == parser->layer)
 			continue;
-		if (parser->layer == HASH_RXQ_ETH) {
+		if (parser->layer == HASH_RXQ_ETH ||
+		    parser->layer == HASH_RXQ_TUNNEL) {
 			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
 				size = sizeof(struct ibv_flow_spec_ipv4_ext);
 				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
-					.type = IBV_FLOW_SPEC_IPV4_EXT,
+					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
 					.size = size,
 				};
 			} else {
 				size = sizeof(struct ibv_flow_spec_ipv6);
 				specs.ipv6 = (struct ibv_flow_spec_ipv6){
-					.type = IBV_FLOW_SPEC_IPV6,
+					.type = inner | IBV_FLOW_SPEC_IPV6,
 					.size = size,
 				};
 			}
@@ -1047,7 +1075,7 @@  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
 			size = sizeof(struct ibv_flow_spec_tcp_udp);
 			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
-				.type = ((i == HASH_RXQ_UDPV4 ||
+				.type = inner | ((i == HASH_RXQ_UDPV4 ||
 					  i == HASH_RXQ_UDPV6) ?
 					 IBV_FLOW_SPEC_UDP :
 					 IBV_FLOW_SPEC_TCP),
@@ -1068,6 +1096,8 @@  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 /**
  * Update flows according to pattern and RSS hash fields.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in, out] parser
  *   Internal parser structure.
  *
@@ -1075,20 +1105,63 @@  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
+mlx5_flow_convert_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser)
 {
-	const unsigned int ipv4 =
+	unsigned int ipv4 =
 		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
 	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
 	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
 	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
 	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
-	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
 	unsigned int i;
+	int found = 0;
 
-	if (parser->layer == HASH_RXQ_ETH)
+	/*
+	 * Outer RSS.
+	 * HASH_RXQ_ETH is the only rule since tunnel packet match this
+	 * rule must match outer pattern.
+	 */
+	if (parser->tunnel && !parser->rss_conf.level) {
+		/* Remove flows other than default. */
+		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+		ipv4 = hash_rxq_init[parser->out_layer].ip_version == MLX5_IPV4;
+		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &
+		    parser->rss_conf.types) {
+			parser->queue[HASH_RXQ_ETH].hash_fields =
+				hash_rxq_init[parser->out_layer].hash_fields;
+		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &
+		    parser->rss_conf.types)) {
+			parser->queue[HASH_RXQ_ETH].hash_fields =
+				hash_rxq_init[ip].hash_fields;
+		} else if (parser->rss_conf.types) {
+			DRV_LOG(WARNING,
+				"port %u rss outer hash function doesn't match"
+				" pattern", dev->data->port_id);
+		}
+		return 0;
+	}
+	if (parser->layer == HASH_RXQ_ETH || parser->layer == HASH_RXQ_TUNNEL) {
+		/* Remove unused flows according to hash function. */
+		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+			if (hash_rxq_init[i].dpdk_rss_hf &
+			    parser->rss_conf.types) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[i].hash_fields;
+				continue;
+			}
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
 		return 0;
-	/* This layer becomes useless as the pattern define under layers. */
+	}
+	/* Remove ETH layer flow. */
 	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
 	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
 	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
@@ -1098,9 +1171,52 @@  mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
 		rte_free(parser->queue[i].ibv_attr);
 		parser->queue[i].ibv_attr = NULL;
 	}
-	/* Remove impossible flow according to the RSS configuration. */
-	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
-	    parser->rss_conf.types) {
+	/*
+	 * Keep L4 flows as IP pattern has to support L4 RSS.
+	 * Otherwise, only keep the flow that match the pattern.
+	 */
+	if (parser->layer != ip) {
+		/* Only keep the flow that match the pattern. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if (i == parser->layer)
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	}
+	if (parser->rss_conf.types) {
+		/* Remove impossible flow according to the RSS configuration. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+			if (parser->rss_conf.types &
+			    hash_rxq_init[i].dpdk_rss_hf) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[i].hash_fields;
+				found = 1;
+				continue;
+			}
+			/* L4 flow could be used for L3 RSS. */
+			if (i == parser->layer && i < ip &&
+			    (hash_rxq_init[ip].dpdk_rss_hf &
+			     parser->rss_conf.types)) {
+				parser->queue[i].hash_fields =
+					hash_rxq_init[ip].hash_fields;
+				found = 1;
+				continue;
+			}
+			/* L3 flow and L4 hash: non-rss L3 flow. */
+			if (i == parser->layer && i == ip && found)
+				/* IP pattern and L4 HF. */
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+		if (!found)
+			DRV_LOG(WARNING,
+				"port %u rss hash function doesn't match "
+				"pattern", dev->data->port_id);
+	} else {
 		/* Remove any other flow. */
 		for (i = hmin; i != (hmax + 1); ++i) {
 			if (i == parser->layer || !parser->queue[i].ibv_attr)
@@ -1108,8 +1224,6 @@  mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
 			rte_free(parser->queue[i].ibv_attr);
 			parser->queue[i].ibv_attr = NULL;
 		}
-	} else if (!parser->queue[ip].ibv_attr) {
-		/* no RSS possible with the current configuration. */
 		parser->rss_conf.queue_num = 1;
 	}
 	return 0;
@@ -1179,10 +1293,6 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 		for (i = 0; i != hash_rxq_init_n; ++i) {
 			unsigned int offset;
 
-			if (!(parser->rss_conf.types &
-			      hash_rxq_init[i].dpdk_rss_hf) &&
-			    (i != HASH_RXQ_ETH))
-				continue;
 			offset = parser->queue[i].offset;
 			parser->queue[i].ibv_attr =
 				mlx5_flow_convert_allocate(offset, error);
@@ -1194,6 +1304,7 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 	/* Third step. Conversion parse, fill the specifications. */
 	parser->inner = 0;
 	parser->tunnel = 0;
+	parser->layer = HASH_RXQ_ETH;
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
 		struct mlx5_flow_data data = {
 			.parser = parser,
@@ -1211,23 +1322,23 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 		if (ret)
 			goto exit_free;
 	}
-	if (parser->mark)
-		mlx5_flow_create_flag_mark(parser, parser->mark_id);
-	if (parser->count && parser->create) {
-		mlx5_flow_create_count(dev, parser);
-		if (!parser->cs)
-			goto exit_count_error;
-	}
 	/*
 	 * Last step. Complete missing specification to reach the RSS
 	 * configuration.
 	 */
 	if (!parser->drop)
-		ret = mlx5_flow_convert_rss(parser);
+		ret = mlx5_flow_convert_rss(dev, parser);
 		if (ret)
 			goto exit_free;
 		mlx5_flow_convert_finalise(parser);
 	mlx5_flow_update_priority(dev, parser, attr);
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
+	if (parser->count && parser->create) {
+		mlx5_flow_create_count(dev, parser);
+		if (!parser->cs)
+			goto exit_count_error;
+	}
 exit_free:
 	/* Only verification is expected, all resources should be released. */
 	if (!parser->create) {
@@ -1275,17 +1386,11 @@  mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
 	for (i = 0; i != hash_rxq_init_n; ++i) {
 		if (!parser->queue[i].ibv_attr)
 			continue;
-		/* Specification must be the same l3 type or none. */
-		if (parser->layer == HASH_RXQ_ETH ||
-		    (hash_rxq_init[parser->layer].ip_version ==
-		     hash_rxq_init[i].ip_version) ||
-		    (hash_rxq_init[i].ip_version == 0)) {
-			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
-					parser->queue[i].offset);
-			memcpy(dst, src, size);
-			++parser->queue[i].ibv_attr->num_of_specs;
-			parser->queue[i].offset += size;
-		}
+		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+				parser->queue[i].offset);
+		memcpy(dst, src, size);
+		++parser->queue[i].ibv_attr->num_of_specs;
+		parser->queue[i].offset += size;
 	}
 }
 
@@ -1316,9 +1421,7 @@  mlx5_flow_create_eth(const struct rte_flow_item *item,
 		.size = eth_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_ETH;
+	parser->layer = HASH_RXQ_ETH;
 	if (spec) {
 		unsigned int i;
 
@@ -1431,9 +1534,7 @@  mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 		.size = ipv4_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_IPV4;
+	parser->layer = HASH_RXQ_IPV4;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1486,9 +1587,7 @@  mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 		.size = ipv6_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner)
-		parser->layer = HASH_RXQ_IPV6;
+	parser->layer = HASH_RXQ_IPV6;
 	if (spec) {
 		unsigned int i;
 		uint32_t vtc_flow_val;
@@ -1561,13 +1660,10 @@  mlx5_flow_create_udp(const struct rte_flow_item *item,
 		.size = udp_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner) {
-		if (parser->layer == HASH_RXQ_IPV4)
-			parser->layer = HASH_RXQ_UDPV4;
-		else
-			parser->layer = HASH_RXQ_UDPV6;
-	}
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_UDPV4;
+	else
+		parser->layer = HASH_RXQ_UDPV6;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1610,13 +1706,10 @@  mlx5_flow_create_tcp(const struct rte_flow_item *item,
 		.size = tcp_size,
 	};
 
-	/* Don't update layer for the inner pattern. */
-	if (!parser->inner) {
-		if (parser->layer == HASH_RXQ_IPV4)
-			parser->layer = HASH_RXQ_TCPV4;
-		else
-			parser->layer = HASH_RXQ_TCPV6;
-	}
+	if (parser->layer == HASH_RXQ_IPV4)
+		parser->layer = HASH_RXQ_TCPV4;
+	else
+		parser->layer = HASH_RXQ_TCPV6;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1666,6 +1759,8 @@  mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 	id.vni[0] = 0;
 	parser->inner = IBV_FLOW_SPEC_INNER;
 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
+	parser->out_layer = parser->layer;
+	parser->layer = HASH_RXQ_TUNNEL;
 	if (spec) {
 		if (!mask)
 			mask = default_mask;
@@ -1720,6 +1815,8 @@  mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
 
 	parser->inner = IBV_FLOW_SPEC_INNER;
 	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
+	parser->out_layer = parser->layer;
+	parser->layer = HASH_RXQ_TUNNEL;
 	mlx5_flow_create_copy(parser, &tunnel, size);
 	return 0;
 }
@@ -1883,33 +1980,33 @@  mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
 	unsigned int i;
 
 	for (i = 0; i != hash_rxq_init_n; ++i) {
-		uint64_t hash_fields;
-
 		if (!parser->queue[i].ibv_attr)
 			continue;
 		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
 		parser->queue[i].ibv_attr = NULL;
-		hash_fields = hash_rxq_init[i].hash_fields;
+		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
 		if (!priv->dev->data->dev_started)
 			continue;
 		flow->frxq[i].hrxq =
 			mlx5_hrxq_get(dev,
 				      parser->rss_conf.key,
 				      parser->rss_conf.key_len,
-				      hash_fields,
+				      flow->frxq[i].hash_fields,
 				      parser->rss_conf.queue,
 				      parser->rss_conf.queue_num,
-				      parser->tunnel);
+				      parser->tunnel,
+				      parser->rss_conf.level);
 		if (flow->frxq[i].hrxq)
 			continue;
 		flow->frxq[i].hrxq =
 			mlx5_hrxq_new(dev,
 				      parser->rss_conf.key,
 				      parser->rss_conf.key_len,
-				      hash_fields,
+				      flow->frxq[i].hash_fields,
 				      parser->rss_conf.queue,
 				      parser->rss_conf.queue_num,
-				      parser->tunnel);
+				      parser->tunnel,
+				      parser->rss_conf.level);
 		if (!flow->frxq[i].hrxq) {
 			return rte_flow_error_set(error, ENOMEM,
 						  RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -2006,7 +2103,7 @@  mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
 		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
 			dev->data->port_id,
 			(void *)flow, i,
-			(void *)flow->frxq[i].hrxq,
+			(void *)flow->frxq[i].hrxq->qp,
 			(void *)flow->frxq[i].ibv_flow);
 	}
 	if (!flows_n) {
@@ -2532,19 +2629,21 @@  mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 			flow->frxq[i].hrxq =
 				mlx5_hrxq_get(dev, flow->rss_conf.key,
 					      flow->rss_conf.key_len,
-					      hash_rxq_init[i].hash_fields,
+					      flow->frxq[i].hash_fields,
 					      flow->rss_conf.queue,
 					      flow->rss_conf.queue_num,
-					      flow->tunnel);
+					      flow->tunnel,
+					      flow->rss_conf.level);
 			if (flow->frxq[i].hrxq)
 				goto flow_create;
 			flow->frxq[i].hrxq =
 				mlx5_hrxq_new(dev, flow->rss_conf.key,
 					      flow->rss_conf.key_len,
-					      hash_rxq_init[i].hash_fields,
+					      flow->frxq[i].hash_fields,
 					      flow->rss_conf.queue,
 					      flow->rss_conf.queue_num,
-					      flow->tunnel);
+					      flow->tunnel,
+					      flow->rss_conf.level);
 			if (!flow->frxq[i].hrxq) {
 				DRV_LOG(DEBUG,
 					"port %u flow %p cannot be applied",
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index be684d378..6874aa32a 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -313,6 +313,21 @@  mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type)
 	return mlx5dv_init_obj(obj, obj_type);
 }
 
+static struct ibv_qp *
+mlx5_glue_dv_create_qp(struct ibv_context *context,
+		       struct ibv_qp_init_attr_ex *qp_init_attr_ex,
+		       struct mlx5dv_qp_init_attr *dv_qp_init_attr)
+{
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	return mlx5dv_create_qp(context, qp_init_attr_ex, dv_qp_init_attr);
+#else
+	(void)context;
+	(void)qp_init_attr_ex;
+	(void)dv_qp_init_attr;
+	return NULL;
+#endif
+}
+
 const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
 	.version = MLX5_GLUE_VERSION,
 	.fork_init = mlx5_glue_fork_init,
@@ -356,4 +371,5 @@  const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
 	.dv_query_device = mlx5_glue_dv_query_device,
 	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
 	.dv_init_obj = mlx5_glue_dv_init_obj,
+	.dv_create_qp = mlx5_glue_dv_create_qp,
 };
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index b5efee3b6..841363872 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -31,6 +31,10 @@  struct ibv_counter_set_init_attr;
 struct ibv_query_counter_set_attr;
 #endif
 
+#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+struct mlx5dv_qp_init_attr;
+#endif
+
 /* LIB_GLUE_VERSION must be updated every time this structure is modified. */
 struct mlx5_glue {
 	const char *version;
@@ -106,6 +110,10 @@  struct mlx5_glue {
 				   enum mlx5dv_set_ctx_attr_type type,
 				   void *attr);
 	int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
+	struct ibv_qp *(*dv_create_qp)
+		(struct ibv_context *context,
+		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
+		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
 };
 
 const struct mlx5_glue *mlx5_glue;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 073732e16..6e5565fb2 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1386,6 +1386,8 @@  mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
  *   Number of queues.
  * @param tunnel
  *   Tunnel type.
+ * @param rss_level
+ *   RSS hash on tunnel level.
  *
  * @return
  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
@@ -1394,13 +1396,17 @@  struct mlx5_hrxq *
 mlx5_hrxq_new(struct rte_eth_dev *dev,
 	      const uint8_t *rss_key, uint32_t rss_key_len,
 	      uint64_t hash_fields,
-	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
+	      const uint16_t *queues, uint32_t queues_n,
+	      uint32_t tunnel, uint32_t rss_level)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_hrxq *hrxq;
 	struct mlx5_ind_table_ibv *ind_tbl;
 	struct ibv_qp *qp;
 	int err;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	struct mlx5dv_qp_init_attr qp_init_attr = {0};
+#endif
 
 	queues_n = hash_fields ? queues_n : 1;
 	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
@@ -1410,6 +1416,33 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
 		rte_errno = ENOMEM;
 		return NULL;
 	}
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+	if (tunnel) {
+		qp_init_attr.comp_mask =
+				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
+		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
+	}
+	qp = mlx5_glue->dv_create_qp(
+		priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = (void *)(uintptr_t)rss_key,
+				.rx_hash_fields_mask = hash_fields |
+					(tunnel && rss_level ?
+					(uint32_t)IBV_RX_HASH_INNER : 0),
+			},
+			.rwq_ind_tbl = ind_tbl->ind_table,
+			.pd = priv->pd,
+		},
+		&qp_init_attr);
+#else
 	qp = mlx5_glue->create_qp_ex
 		(priv->ctx,
 		 &(struct ibv_qp_init_attr_ex){
@@ -1427,6 +1460,7 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
 			.rwq_ind_tbl = ind_tbl->ind_table,
 			.pd = priv->pd,
 		 });
+#endif
 	if (!qp) {
 		rte_errno = errno;
 		goto error;
@@ -1439,6 +1473,7 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
 	hrxq->rss_key_len = rss_key_len;
 	hrxq->hash_fields = hash_fields;
 	hrxq->tunnel = tunnel;
+	hrxq->rss_level = rss_level;
 	memcpy(hrxq->rss_key, rss_key, rss_key_len);
 	rte_atomic32_inc(&hrxq->refcnt);
 	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
@@ -1448,6 +1483,8 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
 	return hrxq;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
+	DRV_LOG(ERR, "port %u: Error creating Hash Rx queue",
+		dev->data->port_id);
 	mlx5_ind_table_ibv_release(dev, ind_tbl);
 	if (qp)
 		claim_zero(mlx5_glue->destroy_qp(qp));
@@ -1469,6 +1506,8 @@  mlx5_hrxq_new(struct rte_eth_dev *dev,
  *   Number of queues.
  * @param tunnel
  *   Tunnel type.
+ * @param rss_level
+ *   RSS hash on tunnel level
  *
  * @return
  *   An hash Rx queue on success.
@@ -1477,7 +1516,8 @@  struct mlx5_hrxq *
 mlx5_hrxq_get(struct rte_eth_dev *dev,
 	      const uint8_t *rss_key, uint32_t rss_key_len,
 	      uint64_t hash_fields,
-	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
+	      const uint16_t *queues, uint32_t queues_n,
+	      uint32_t tunnel, uint32_t rss_level)
 {
 	struct priv *priv = dev->data->dev_private;
 	struct mlx5_hrxq *hrxq;
@@ -1494,6 +1534,8 @@  mlx5_hrxq_get(struct rte_eth_dev *dev,
 			continue;
 		if (hrxq->tunnel != tunnel)
 			continue;
+		if (hrxq->rss_level != rss_level)
+			continue;
 		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
 		if (!ind_tbl)
 			continue;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d35605b55..62cf55109 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -147,6 +147,7 @@  struct mlx5_hrxq {
 	struct ibv_qp *qp; /* Verbs queue pair. */
 	uint64_t hash_fields; /* Verbs Hash fields. */
 	uint32_t tunnel; /* Tunnel type. */
+	uint32_t rss_level; /* RSS on tunnel level. */
 	uint32_t rss_key_len; /* Hash key length in bytes. */
 	uint8_t rss_key[]; /* Hash key. */
 };
@@ -251,12 +252,12 @@  struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
 				const uint8_t *rss_key, uint32_t rss_key_len,
 				uint64_t hash_fields,
 				const uint16_t *queues, uint32_t queues_n,
-				uint32_t tunnel);
+				uint32_t tunnel, uint32_t rss_level);
 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
 				const uint8_t *rss_key, uint32_t rss_key_len,
 				uint64_t hash_fields,
 				const uint16_t *queues, uint32_t queues_n,
-				uint32_t tunnel);
+				uint32_t tunnel, uint32_t rss_level);
 int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
 int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
 uint64_t mlx5_get_rx_port_offloads(void);