[dpdk-dev,v2,01/15] net/mlx5: support 16 hardware priorities

Message ID 20180410133415.189905-2-xuemingl@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Xueming Li April 10, 2018, 1:34 p.m. UTC
  Adjust flow priority mapping to adapt new hardware 16 verb flow
priorites support:
0-3: RTE FLOW tunnel rule
4-7: RTE FLOW non-tunnel rule
8-15: PMD control flow

Signed-off-by: Xueming Li <xuemingl@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         |  10 ++++
 drivers/net/mlx5/mlx5.h         |   8 +++
 drivers/net/mlx5/mlx5_flow.c    | 107 ++++++++++++++++++++++++++++++----------
 drivers/net/mlx5/mlx5_trigger.c |   8 ---
 4 files changed, 100 insertions(+), 33 deletions(-)
  

Comments

Nélio Laranjeiro April 10, 2018, 2:41 p.m. UTC | #1
On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:
> Adjust flow priority mapping to adapt new hardware 16 verb flow
> priorites support:
> 0-3: RTE FLOW tunnel rule
> 4-7: RTE FLOW non-tunnel rule
> 8-15: PMD control flow

This commit log is inducing people in error, this amount of priority
depends on the Mellanox OFED installed, it is not available on upstream
Linux kernel yet nor in the current Mellanox OFED GA.  

What happens when those amount of priority are not available, is it
removing a functionality?  Will it collide with other flows?

> Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         |  10 ++++
>  drivers/net/mlx5/mlx5.h         |   8 +++
>  drivers/net/mlx5/mlx5_flow.c    | 107 ++++++++++++++++++++++++++++++----------
>  drivers/net/mlx5/mlx5_trigger.c |   8 ---
>  4 files changed, 100 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index cfab55897..a1f2799e5 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
>  		priv->txqs_n = 0;
>  		priv->txqs = NULL;
>  	}
> +	mlx5_flow_delete_drop_queue(dev);
>
>  	if (priv->pd != NULL) {
>  		assert(priv->ctx != NULL);
>  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
> @@ -993,6 +994,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>  		mlx5_set_link_up(eth_dev);
>  		/* Store device configuration on private structure. */
>  		priv->config = config;
> +		/* Create drop queue. */
> +		err = mlx5_flow_create_drop_queue(eth_dev);
> +		if (err) {
> +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> +				eth_dev->data->port_id, strerror(rte_errno));
> +			goto port_error;
> +		}
> +		/* Supported flow priority number detection. */
> +		mlx5_flow_priorities_detect(eth_dev);
>  		continue;
>  port_error:
>  		if (priv)
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 63b24e6bb..708272f6d 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -89,6 +89,8 @@ struct mlx5_dev_config {
>  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
>  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
> +	unsigned int flow_priority_shift; /* Non-tunnel flow priority shift. */
> +	unsigned int control_flow_priority; /* Control flow priority. */
>  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
>  	unsigned int ind_table_max_size; /* Maximum indirection table size. */
>  	int txq_inline; /* Maximum packet size for inlining. */
> @@ -105,6 +107,11 @@ enum mlx5_verbs_alloc_type {
>  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
>  };
>  
> +/* 8 Verbs priorities per flow. */
> +#define MLX5_VERBS_FLOW_PRIO_8 8
> +/* 4 Verbs priorities per flow. */
> +#define MLX5_VERBS_FLOW_PRIO_4 4
> +
>  /**
>   * Verbs allocator needs a context to know in the callback which kind of
>   * resources it is allocating.
> @@ -253,6 +260,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);
>  
>  /* mlx5_flow.c */
>  
> +void mlx5_flow_priorities_detect(struct rte_eth_dev *dev);
>  int mlx5_flow_validate(struct rte_eth_dev *dev,
>  		       const struct rte_flow_attr *attr,
>  		       const struct rte_flow_item items[],
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 288610620..394760418 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -32,9 +32,6 @@
>  #include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  
> -/* Define minimal priority for control plane flows. */
> -#define MLX5_CTRL_FLOW_PRIORITY 4
> -
>  /* Internet Protocol versions. */
>  #define MLX5_IPV4 4
>  #define MLX5_IPV6 6
> @@ -129,7 +126,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_UDPV4] = {
> @@ -138,7 +135,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_IPV4] = {
> @@ -146,7 +143,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV4),
>  		.dpdk_rss_hf = (ETH_RSS_IPV4 |
>  				ETH_RSS_FRAG_IPV4),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV4,
>  	},
>  	[HASH_RXQ_TCPV6] = {
> @@ -155,7 +152,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_TCP |
>  				IBV_RX_HASH_DST_PORT_TCP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_UDPV6] = {
> @@ -164,7 +161,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_SRC_PORT_UDP |
>  				IBV_RX_HASH_DST_PORT_UDP),
>  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
> -		.flow_priority = 1,
> +		.flow_priority = 0,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_IPV6] = {
> @@ -172,13 +169,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
>  				IBV_RX_HASH_DST_IPV6),
>  		.dpdk_rss_hf = (ETH_RSS_IPV6 |
>  				ETH_RSS_FRAG_IPV6),
> -		.flow_priority = 2,
> +		.flow_priority = 1,
>  		.ip_version = MLX5_IPV6,
>  	},
>  	[HASH_RXQ_ETH] = {
>  		.hash_fields = 0,
>  		.dpdk_rss_hf = 0,
> -		.flow_priority = 3,
> +		.flow_priority = 2,
>  	},
>  };

If the amount of priorities remains 8, you are removing the priority for
the tunnel flows introduced by 
commit 749365717f5c ("net/mlx5: change tunnel flow priority")

Please keep this functionality when this patch fails to get the expected
16 Verbs priorities.

> @@ -536,6 +533,8 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
>  /**
>   * Extract attribute to the parser.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in] attr
>   *   Flow rule attributes.
>   * @param[out] error
> @@ -545,9 +544,12 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
> +mlx5_flow_convert_attributes(struct rte_eth_dev *dev,
> +			     const struct rte_flow_attr *attr,
>  			     struct rte_flow_error *error)
>  {
> +	struct priv *priv = dev->data->dev_private;
> +
>  	if (attr->group) {
>  		rte_flow_error_set(error, ENOTSUP,
>  				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> @@ -555,7 +557,7 @@ mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
>  				   "groups are not supported");
>  		return -rte_errno;
>  	}
> -	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
> +	if (attr->priority > priv->config.control_flow_priority) {
>  		rte_flow_error_set(error, ENOTSUP,
>  				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
>  				   NULL,
> @@ -900,30 +902,38 @@ mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
>   * Make inner packet matching with an higher priority from the non Inner
>   * matching.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in, out] parser
>   *   Internal parser structure.
>   * @param attr
>   *   User flow attribute.
>   */
>  static void
> -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> +			  struct mlx5_flow_parse *parser,
>  			  const struct rte_flow_attr *attr)
>  {
> +	struct priv *priv = dev->data->dev_private;
>  	unsigned int i;
> +	uint16_t priority;
>  
> +	if (priv->config.flow_priority_shift == 1)
> +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;
> +	else
> +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> +	if (!parser->inner)
> +		priority += priv->config.flow_priority_shift;
>  	if (parser->drop) {
> -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
> -			attr->priority +
> -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
> +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
>  		return;
>  	}
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
> -		if (parser->queue[i].ibv_attr) {
> -			parser->queue[i].ibv_attr->priority =
> -				attr->priority +
> -				hash_rxq_init[i].flow_priority -
> -				(parser->inner ? 1 : 0);
> -		}
> +		if (!parser->queue[i].ibv_attr)
> +			continue;
> +		parser->queue[i].ibv_attr->priority = priority +
> +				hash_rxq_init[i].flow_priority;
>  	}
>  }
>  
> @@ -1087,7 +1097,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		.layer = HASH_RXQ_ETH,
>  		.mark_id = MLX5_FLOW_MARK_DEFAULT,
>  	};
> -	ret = mlx5_flow_convert_attributes(attr, error);
> +	ret = mlx5_flow_convert_attributes(dev, attr, error);
>  	if (ret)
>  		return ret;
>  	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
> @@ -1158,7 +1168,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	 */
>  	if (!parser->drop)
>  		mlx5_flow_convert_finalise(parser);
> -	mlx5_flow_update_priority(parser, attr);
> +	mlx5_flow_update_priority(dev, parser, attr);
>  exit_free:
>  	/* Only verification is expected, all resources should be released. */
>  	if (!parser->create) {
> @@ -2450,7 +2460,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
>  	struct priv *priv = dev->data->dev_private;
>  	const struct rte_flow_attr attr = {
>  		.ingress = 1,
> -		.priority = MLX5_CTRL_FLOW_PRIORITY,
> +		.priority = priv->config.control_flow_priority,
>  	};
>  	struct rte_flow_item items[] = {
>  		{
> @@ -3161,3 +3171,50 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
>  	}
>  	return 0;
>  }
> +
> +/**
> + * Detect number of Verbs flow priorities supported.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + */
> +void
> +mlx5_flow_priorities_detect(struct rte_eth_dev *dev)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;
> +	struct {
> +		struct ibv_flow_attr attr;
> +		struct ibv_flow_spec_eth eth;
> +		struct ibv_flow_spec_action_drop drop;
> +	} flow_attr = {
> +		.attr = {
> +			.num_of_specs = 2,
> +			.priority = verb_priorities - 1,
> +		},
> +		.eth = {
> +			.type = IBV_FLOW_SPEC_ETH,
> +			.size = sizeof(struct ibv_flow_spec_eth),
> +		},
> +		.drop = {
> +			.size = sizeof(struct ibv_flow_spec_action_drop),
> +			.type = IBV_FLOW_SPEC_ACTION_DROP,
> +		},
> +	};
> +	struct ibv_flow *flow;
> +
> +	if (priv->config.control_flow_priority)
> +		return;
> +	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
> +				      &flow_attr.attr);
> +	if (flow) {
> +		priv->config.flow_priority_shift = MLX5_VERBS_FLOW_PRIO_8 / 2;
> +		claim_zero(mlx5_glue->destroy_flow(flow));
> +	} else {
> +		priv->config.flow_priority_shift = 1;
> +		verb_priorities = verb_priorities / 2;
> +	}
> +	priv->config.control_flow_priority = 1;
> +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
> +		dev->data->port_id, verb_priorities);
> +}
> diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
> index 6bb4ffb14..d80a2e688 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	int ret;
>  
>  	dev->data->dev_started = 1;
> -	ret = mlx5_flow_create_drop_queue(dev);
> -	if (ret) {
> -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> -			dev->data->port_id, strerror(rte_errno));
> -		goto error;
> -	}
>  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
>  		dev->data->port_id);
>  	rte_mempool_walk(mlx5_mp2mr_iter, priv);
> @@ -202,7 +196,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
>  	mlx5_traffic_disable(dev);
>  	mlx5_txq_stop(dev);
>  	mlx5_rxq_stop(dev);
> -	mlx5_flow_delete_drop_queue(dev);
>  	rte_errno = ret; /* Restore rte_errno. */
>  	return -rte_errno;
>  }
> @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
>  	mlx5_rxq_stop(dev);
>  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
>  		mlx5_mr_release(mr);
> -	mlx5_flow_delete_drop_queue(dev);
>  }
>  
>  /**
> -- 
> 2.13.3

I have few concerns on this, mlx5_pci_probe() will also probe any
under layer verbs device, and in a near future the representors
associated to a VF.
Making such detection should only be done once by the PF, I also wander
if it is possible to make such drop action in a representor directly
using Verbs.

Another concern is, this patch will be reverted in some time when those
16 priority will be always available.  It will be easier to remove this
detection function than searching for all those modifications.

I would suggest to have a standalone mlx5_flow_priorities_detect() which
creates and deletes all resources needed for this detection.

Thanks,
  
Xueming Li April 10, 2018, 3:22 p.m. UTC | #2
Hi Nelio,

> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Tuesday, April 10, 2018 10:42 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware priorities

> 

> On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:

> > Adjust flow priority mapping to adapt new hardware 16 verb flow

> > priorites support:

> > 0-3: RTE FLOW tunnel rule

> > 4-7: RTE FLOW non-tunnel rule

> > 8-15: PMD control flow

> 

> This commit log is inducing people in error, this amount of priority

> depends on the Mellanox OFED installed, it is not available on upstream

> Linux kernel yet nor in the current Mellanox OFED GA.

> 

> What happens when those amount of priority are not available, is it

> removing a functionality?  Will it collide with other flows?


If 16  priorities not available, simply behavior as 8 priorities.

> 

> > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > ---

> >  drivers/net/mlx5/mlx5.c         |  10 ++++

> >  drivers/net/mlx5/mlx5.h         |   8 +++

> >  drivers/net/mlx5/mlx5_flow.c    | 107 ++++++++++++++++++++++++++++++---

> -------

> >  drivers/net/mlx5/mlx5_trigger.c |   8 ---

> >  4 files changed, 100 insertions(+), 33 deletions(-)

> >

> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index

> > cfab55897..a1f2799e5 100644

> > --- a/drivers/net/mlx5/mlx5.c

> > +++ b/drivers/net/mlx5/mlx5.c

> > @@ -197,6 +197,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)

> >  		priv->txqs_n = 0;

> >  		priv->txqs = NULL;

> >  	}

> > +	mlx5_flow_delete_drop_queue(dev);

> >

> >  	if (priv->pd != NULL) {

> >  		assert(priv->ctx != NULL);

> >  		claim_zero(mlx5_glue->dealloc_pd(priv->pd));

> > @@ -993,6 +994,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv

> __rte_unused,

> >  		mlx5_set_link_up(eth_dev);

> >  		/* Store device configuration on private structure. */

> >  		priv->config = config;

> > +		/* Create drop queue. */

> > +		err = mlx5_flow_create_drop_queue(eth_dev);

> > +		if (err) {

> > +			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > +				eth_dev->data->port_id, strerror(rte_errno));

> > +			goto port_error;

> > +		}

> > +		/* Supported flow priority number detection. */

> > +		mlx5_flow_priorities_detect(eth_dev);

> >  		continue;

> >  port_error:

> >  		if (priv)

> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index

> > 63b24e6bb..708272f6d 100644

> > --- a/drivers/net/mlx5/mlx5.h

> > +++ b/drivers/net/mlx5/mlx5.h

> > @@ -89,6 +89,8 @@ struct mlx5_dev_config {

> >  	unsigned int rx_vec_en:1; /* Rx vector is enabled. */

> >  	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */

> >  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */

> > +	unsigned int flow_priority_shift; /* Non-tunnel flow priority shift.

> */

> > +	unsigned int control_flow_priority; /* Control flow priority. */

> >  	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */

> >  	unsigned int ind_table_max_size; /* Maximum indirection table size.

> */

> >  	int txq_inline; /* Maximum packet size for inlining. */ @@ -105,6

> > +107,11 @@ enum mlx5_verbs_alloc_type {

> >  	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,

> >  };

> >

> > +/* 8 Verbs priorities per flow. */

> > +#define MLX5_VERBS_FLOW_PRIO_8 8

> > +/* 4 Verbs priorities per flow. */

> > +#define MLX5_VERBS_FLOW_PRIO_4 4

> > +

> >  /**

> >   * Verbs allocator needs a context to know in the callback which kind

> of

> >   * resources it is allocating.

> > @@ -253,6 +260,7 @@ int mlx5_traffic_restart(struct rte_eth_dev *dev);

> >

> >  /* mlx5_flow.c */

> >

> > +void mlx5_flow_priorities_detect(struct rte_eth_dev *dev);

> >  int mlx5_flow_validate(struct rte_eth_dev *dev,

> >  		       const struct rte_flow_attr *attr,

> >  		       const struct rte_flow_item items[], diff --git

> > a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index

> > 288610620..394760418 100644

> > --- a/drivers/net/mlx5/mlx5_flow.c

> > +++ b/drivers/net/mlx5/mlx5_flow.c

> > @@ -32,9 +32,6 @@

> >  #include "mlx5_prm.h"

> >  #include "mlx5_glue.h"

> >

> > -/* Define minimal priority for control plane flows. */ -#define

> > MLX5_CTRL_FLOW_PRIORITY 4

> > -

> >  /* Internet Protocol versions. */

> >  #define MLX5_IPV4 4

> >  #define MLX5_IPV6 6

> > @@ -129,7 +126,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_TCP |

> >  				IBV_RX_HASH_DST_PORT_TCP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_UDPV4] = {

> > @@ -138,7 +135,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_UDP |

> >  				IBV_RX_HASH_DST_PORT_UDP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_IPV4] = {

> > @@ -146,7 +143,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_DST_IPV4),

> >  		.dpdk_rss_hf = (ETH_RSS_IPV4 |

> >  				ETH_RSS_FRAG_IPV4),

> > -		.flow_priority = 2,

> > +		.flow_priority = 1,

> >  		.ip_version = MLX5_IPV4,

> >  	},

> >  	[HASH_RXQ_TCPV6] = {

> > @@ -155,7 +152,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_TCP |

> >  				IBV_RX_HASH_DST_PORT_TCP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_UDPV6] = {

> > @@ -164,7 +161,7 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_SRC_PORT_UDP |

> >  				IBV_RX_HASH_DST_PORT_UDP),

> >  		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,

> > -		.flow_priority = 1,

> > +		.flow_priority = 0,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_IPV6] = {

> > @@ -172,13 +169,13 @@ const struct hash_rxq_init hash_rxq_init[] = {

> >  				IBV_RX_HASH_DST_IPV6),

> >  		.dpdk_rss_hf = (ETH_RSS_IPV6 |

> >  				ETH_RSS_FRAG_IPV6),

> > -		.flow_priority = 2,

> > +		.flow_priority = 1,

> >  		.ip_version = MLX5_IPV6,

> >  	},

> >  	[HASH_RXQ_ETH] = {

> >  		.hash_fields = 0,

> >  		.dpdk_rss_hf = 0,

> > -		.flow_priority = 3,

> > +		.flow_priority = 2,

> >  	},

> >  };

> 

> If the amount of priorities remains 8, you are removing the priority for

> the tunnel flows introduced by commit 749365717f5c ("net/mlx5: change

> tunnel flow priority")

> 

> Please keep this functionality when this patch fails to get the expected

> 16 Verbs priorities.


These priority shift are different in 16 priorities scenario, I changed it
to calculation. In function mlx5_flow_priorities_detect(), priority shift 
will be 1 if 8 priorities, 4 in case of 16 priorities. Please refer to changes
in function mlx5_flow_update_priority() as well.

> 

> > @@ -536,6 +533,8 @@ mlx5_flow_item_validate(const struct rte_flow_item

> > *item,

> >  /**

> >   * Extract attribute to the parser.

> >   *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> >   * @param[in] attr

> >   *   Flow rule attributes.

> >   * @param[out] error

> > @@ -545,9 +544,12 @@ mlx5_flow_item_validate(const struct rte_flow_item

> *item,

> >   *   0 on success, a negative errno value otherwise and rte_errno is

> set.

> >   */

> >  static int

> > -mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,

> > +mlx5_flow_convert_attributes(struct rte_eth_dev *dev,

> > +			     const struct rte_flow_attr *attr,

> >  			     struct rte_flow_error *error)  {

> > +	struct priv *priv = dev->data->dev_private;

> > +

> >  	if (attr->group) {

> >  		rte_flow_error_set(error, ENOTSUP,

> >  				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP, @@ -555,7

> +557,7 @@

> > mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,

> >  				   "groups are not supported");

> >  		return -rte_errno;

> >  	}

> > -	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {

> > +	if (attr->priority > priv->config.control_flow_priority) {

> >  		rte_flow_error_set(error, ENOTSUP,

> >  				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,

> >  				   NULL,

> > @@ -900,30 +902,38 @@ mlx5_flow_convert_allocate(unsigned int size,

> struct rte_flow_error *error)

> >   * Make inner packet matching with an higher priority from the non

> Inner

> >   * matching.

> >   *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> >   * @param[in, out] parser

> >   *   Internal parser structure.

> >   * @param attr

> >   *   User flow attribute.

> >   */

> >  static void

> > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,

> > +mlx5_flow_update_priority(struct rte_eth_dev *dev,

> > +			  struct mlx5_flow_parse *parser,

> >  			  const struct rte_flow_attr *attr)  {

> > +	struct priv *priv = dev->data->dev_private;

> >  	unsigned int i;

> > +	uint16_t priority;

> >

> > +	if (priv->config.flow_priority_shift == 1)

> > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;

> > +	else

> > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;

> > +	if (!parser->inner)

> > +		priority += priv->config.flow_priority_shift;

> >  	if (parser->drop) {

> > -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =

> > -			attr->priority +

> > -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> > +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +

> > +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> >  		return;

> >  	}

> >  	for (i = 0; i != hash_rxq_init_n; ++i) {

> > -		if (parser->queue[i].ibv_attr) {

> > -			parser->queue[i].ibv_attr->priority =

> > -				attr->priority +

> > -				hash_rxq_init[i].flow_priority -

> > -				(parser->inner ? 1 : 0);

> > -		}

> > +		if (!parser->queue[i].ibv_attr)

> > +			continue;

> > +		parser->queue[i].ibv_attr->priority = priority +

> > +				hash_rxq_init[i].flow_priority;

> >  	}

> >  }

> >

> > @@ -1087,7 +1097,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  		.layer = HASH_RXQ_ETH,

> >  		.mark_id = MLX5_FLOW_MARK_DEFAULT,

> >  	};

> > -	ret = mlx5_flow_convert_attributes(attr, error);

> > +	ret = mlx5_flow_convert_attributes(dev, attr, error);

> >  	if (ret)

> >  		return ret;

> >  	ret = mlx5_flow_convert_actions(dev, actions, error, parser); @@

> > -1158,7 +1168,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> >  	 */

> >  	if (!parser->drop)

> >  		mlx5_flow_convert_finalise(parser);

> > -	mlx5_flow_update_priority(parser, attr);

> > +	mlx5_flow_update_priority(dev, parser, attr);

> >  exit_free:

> >  	/* Only verification is expected, all resources should be released.

> */

> >  	if (!parser->create) {

> > @@ -2450,7 +2460,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,

> >  	struct priv *priv = dev->data->dev_private;

> >  	const struct rte_flow_attr attr = {

> >  		.ingress = 1,

> > -		.priority = MLX5_CTRL_FLOW_PRIORITY,

> > +		.priority = priv->config.control_flow_priority,

> >  	};

> >  	struct rte_flow_item items[] = {

> >  		{

> > @@ -3161,3 +3171,50 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,

> >  	}

> >  	return 0;

> >  }

> > +

> > +/**

> > + * Detect number of Verbs flow priorities supported.

> > + *

> > + * @param dev

> > + *   Pointer to Ethernet device.

> > + */

> > +void

> > +mlx5_flow_priorities_detect(struct rte_eth_dev *dev) {

> > +	struct priv *priv = dev->data->dev_private;

> > +	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;

> > +	struct {

> > +		struct ibv_flow_attr attr;

> > +		struct ibv_flow_spec_eth eth;

> > +		struct ibv_flow_spec_action_drop drop;

> > +	} flow_attr = {

> > +		.attr = {

> > +			.num_of_specs = 2,

> > +			.priority = verb_priorities - 1,

> > +		},

> > +		.eth = {

> > +			.type = IBV_FLOW_SPEC_ETH,

> > +			.size = sizeof(struct ibv_flow_spec_eth),

> > +		},

> > +		.drop = {

> > +			.size = sizeof(struct ibv_flow_spec_action_drop),

> > +			.type = IBV_FLOW_SPEC_ACTION_DROP,

> > +		},

> > +	};

> > +	struct ibv_flow *flow;

> > +

> > +	if (priv->config.control_flow_priority)

> > +		return;

> > +	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,

> > +				      &flow_attr.attr);

> > +	if (flow) {

> > +		priv->config.flow_priority_shift = MLX5_VERBS_FLOW_PRIO_8 / 2;

> > +		claim_zero(mlx5_glue->destroy_flow(flow));

> > +	} else {

> > +		priv->config.flow_priority_shift = 1;

> > +		verb_priorities = verb_priorities / 2;

> > +	}

> > +	priv->config.control_flow_priority = 1;

> > +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",

> > +		dev->data->port_id, verb_priorities); }

> > diff --git a/drivers/net/mlx5/mlx5_trigger.c

> > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688 100644

> > --- a/drivers/net/mlx5/mlx5_trigger.c

> > +++ b/drivers/net/mlx5/mlx5_trigger.c

> > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)

> >  	int ret;

> >

> >  	dev->data->dev_started = 1;

> > -	ret = mlx5_flow_create_drop_queue(dev);

> > -	if (ret) {

> > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > -			dev->data->port_id, strerror(rte_errno));

> > -		goto error;

> > -	}

> >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",

> >  		dev->data->port_id);

> >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@

> > mlx5_dev_start(struct rte_eth_dev *dev)

> >  	mlx5_traffic_disable(dev);

> >  	mlx5_txq_stop(dev);

> >  	mlx5_rxq_stop(dev);

> > -	mlx5_flow_delete_drop_queue(dev);

> >  	rte_errno = ret; /* Restore rte_errno. */

> >  	return -rte_errno;

> >  }

> > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)

> >  	mlx5_rxq_stop(dev);

> >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))

> >  		mlx5_mr_release(mr);

> > -	mlx5_flow_delete_drop_queue(dev);

> >  }

> >

> >  /**

> > --

> > 2.13.3

> 

> I have few concerns on this, mlx5_pci_probe() will also probe any under

> layer verbs device, and in a near future the representors associated to a

> VF.

> Making such detection should only be done once by the PF, I also wander if

> it is possible to make such drop action in a representor directly using

> Verbs.


Then there should be some work to disable flows in representors? that 
supposed to cover this.

> 

> Another concern is, this patch will be reverted in some time when those

> 16 priority will be always available.  It will be easier to remove this

> detection function than searching for all those modifications.

> 

> I would suggest to have a standalone mlx5_flow_priorities_detect() which

> creates and deletes all resources needed for this detection.


There is an upcoming new feature to support priorities more than 16, auto 
detection will be kept IMHO. Besides, there will be a bundle of resource 
creation and removal in this standalone function, I'm not sure it valuable 
to duplicate them, please refer to function mlx5_flow_create_drop_queue().


> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 12, 2018, 9:09 a.m. UTC | #3
On Tue, Apr 10, 2018 at 03:22:46PM +0000, Xueming(Steven) Li wrote:
> Hi Nelio,
> 
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Tuesday, April 10, 2018 10:42 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware priorities
> > 
> > On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:
> > > Adjust flow priority mapping to adapt new hardware 16 verb flow
> > > priorites support:
> > > 0-3: RTE FLOW tunnel rule
> > > 4-7: RTE FLOW non-tunnel rule
> > > 8-15: PMD control flow
> > 
> > This commit log is inducing people in error, this amount of priority
> > depends on the Mellanox OFED installed, it is not available on upstream
> > Linux kernel yet nor in the current Mellanox OFED GA.
> > 
> > What happens when those amount of priority are not available, is it
> > removing a functionality?  Will it collide with other flows?
> 
> If 16  priorities not available, simply behavior as 8 priorities.

It is not described in the commit log, please add it.

> > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
<snip/>
> > >  	},
> > >  	[HASH_RXQ_ETH] = {
> > >  		.hash_fields = 0,
> > >  		.dpdk_rss_hf = 0,
> > > -		.flow_priority = 3,
> > > +		.flow_priority = 2,
> > >  	},
> > >  };
> > 
> > If the amount of priorities remains 8, you are removing the priority for
> > the tunnel flows introduced by commit 749365717f5c ("net/mlx5: change
> > tunnel flow priority")
> > 
> > Please keep this functionality when this patch fails to get the expected
> > 16 Verbs priorities.
> 
> These priority shift are different in 16 priorities scenario, I changed it
> to calculation. In function mlx5_flow_priorities_detect(), priority shift 
> will be 1 if 8 priorities, 4 in case of 16 priorities. Please refer to changes
> in function mlx5_flow_update_priority() as well.

Please light my lamp, I don't see it...
 
<snip/>
> > >  static void
> > > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> > > +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> > > +			  struct mlx5_flow_parse *parser,
> > >  			  const struct rte_flow_attr *attr)  {
> > > +	struct priv *priv = dev->data->dev_private;
> > >  	unsigned int i;
> > > +	uint16_t priority;
> > >
> > > +	if (priv->config.flow_priority_shift == 1)
> > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;
> > > +	else
> > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> > > +	if (!parser->inner)
> > > +		priority += priv->config.flow_priority_shift;
> > >  	if (parser->drop) {
> > > -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
> > > -			attr->priority +
> > > -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> > > +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
> > > +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
> > >  		return;
> > >  	}
> > >  	for (i = 0; i != hash_rxq_init_n; ++i) {
> > > -		if (parser->queue[i].ibv_attr) {
> > > -			parser->queue[i].ibv_attr->priority =
> > > -				attr->priority +
> > > -				hash_rxq_init[i].flow_priority -
> > > -				(parser->inner ? 1 : 0);
> > > -		}
> > > +		if (!parser->queue[i].ibv_attr)
> > > +			continue;
> > > +		parser->queue[i].ibv_attr->priority = priority +
> > > +				hash_rxq_init[i].flow_priority;

Previous code was subtracting one from the table priorities which was
starting at 1.  In the new code I don't see it.

What I am missing?

> > >  	}
> > >  }
> > >
> > > @@ -1087,7 +1097,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> > >  		.layer = HASH_RXQ_ETH,
> > >  		.mark_id = MLX5_FLOW_MARK_DEFAULT,
> > >  	};
> > > -	ret = mlx5_flow_convert_attributes(attr, error);
> > > +	ret = mlx5_flow_convert_attributes(dev, attr, error);
> > >  	if (ret)
> > >  		return ret;
> > >  	ret = mlx5_flow_convert_actions(dev, actions, error, parser); @@
> > > -1158,7 +1168,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> > >  	 */
> > >  	if (!parser->drop)
> > >  		mlx5_flow_convert_finalise(parser);
> > > -	mlx5_flow_update_priority(parser, attr);
> > > +	mlx5_flow_update_priority(dev, parser, attr);
> > >  exit_free:
> > >  	/* Only verification is expected, all resources should be released.
> > */
> > >  	if (!parser->create) {
> > > @@ -2450,7 +2460,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
> > >  	struct priv *priv = dev->data->dev_private;
> > >  	const struct rte_flow_attr attr = {
> > >  		.ingress = 1,
> > > -		.priority = MLX5_CTRL_FLOW_PRIORITY,
> > > +		.priority = priv->config.control_flow_priority,
> > >  	};
> > >  	struct rte_flow_item items[] = {
> > >  		{
> > > @@ -3161,3 +3171,50 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
> > >  	}
> > >  	return 0;
> > >  }
> > > +
> > > +/**
> > > + * Detect number of Verbs flow priorities supported.
> > > + *
> > > + * @param dev
> > > + *   Pointer to Ethernet device.
> > > + */
> > > +void
> > > +mlx5_flow_priorities_detect(struct rte_eth_dev *dev) {
> > > +	struct priv *priv = dev->data->dev_private;
> > > +	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;
> > > +	struct {
> > > +		struct ibv_flow_attr attr;
> > > +		struct ibv_flow_spec_eth eth;
> > > +		struct ibv_flow_spec_action_drop drop;
> > > +	} flow_attr = {
> > > +		.attr = {
> > > +			.num_of_specs = 2,
> > > +			.priority = verb_priorities - 1,
> > > +		},
> > > +		.eth = {
> > > +			.type = IBV_FLOW_SPEC_ETH,
> > > +			.size = sizeof(struct ibv_flow_spec_eth),
> > > +		},
> > > +		.drop = {
> > > +			.size = sizeof(struct ibv_flow_spec_action_drop),
> > > +			.type = IBV_FLOW_SPEC_ACTION_DROP,
> > > +		},
> > > +	};
> > > +	struct ibv_flow *flow;
> > > +
> > > +	if (priv->config.control_flow_priority)
> > > +		return;
> > > +	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
> > > +				      &flow_attr.attr);
> > > +	if (flow) {
> > > +		priv->config.flow_priority_shift = MLX5_VERBS_FLOW_PRIO_8 / 2;
> > > +		claim_zero(mlx5_glue->destroy_flow(flow));
> > > +	} else {
> > > +		priv->config.flow_priority_shift = 1;
> > > +		verb_priorities = verb_priorities / 2;
> > > +	}
> > > +	priv->config.control_flow_priority = 1;
> > > +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
> > > +		dev->data->port_id, verb_priorities); }
> > > diff --git a/drivers/net/mlx5/mlx5_trigger.c
> > > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688 100644
> > > --- a/drivers/net/mlx5/mlx5_trigger.c
> > > +++ b/drivers/net/mlx5/mlx5_trigger.c
> > > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
> > >  	int ret;
> > >
> > >  	dev->data->dev_started = 1;
> > > -	ret = mlx5_flow_create_drop_queue(dev);
> > > -	if (ret) {
> > > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> > > -			dev->data->port_id, strerror(rte_errno));
> > > -		goto error;
> > > -	}
> > >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
> > >  		dev->data->port_id);
> > >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@
> > > mlx5_dev_start(struct rte_eth_dev *dev)
> > >  	mlx5_traffic_disable(dev);
> > >  	mlx5_txq_stop(dev);
> > >  	mlx5_rxq_stop(dev);
> > > -	mlx5_flow_delete_drop_queue(dev);
> > >  	rte_errno = ret; /* Restore rte_errno. */
> > >  	return -rte_errno;
> > >  }
> > > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
> > >  	mlx5_rxq_stop(dev);
> > >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
> > >  		mlx5_mr_release(mr);
> > > -	mlx5_flow_delete_drop_queue(dev);
> > >  }
> > >
> > >  /**
> > > --
> > > 2.13.3
> > 
> > I have few concerns on this, mlx5_pci_probe() will also probe any under
> > layer verbs device, and in a near future the representors associated to a
> > VF.
> > Making such detection should only be done once by the PF, I also wander if
> > it is possible to make such drop action in a representor directly using
> > Verbs.
> 
> Then there should be some work to disable flows in representors? that 
> supposed to cover this.

The code raising another Verbs device is already present and since the
first entrance of this PMD in the DPDK tree, you must respect the code
already present.
This request is not related directly to a new feature but to an existing
one, the representors being just an example.

This detection should be only done once and not for each of them.

> > Another concern is, this patch will be reverted in some time when those
> > 16 priority will be always available.  It will be easier to remove this
> > detection function than searching for all those modifications.
> > 
> > I would suggest to have a standalone mlx5_flow_priorities_detect() which
> > creates and deletes all resources needed for this detection.
> 
> There is an upcoming new feature to support priorities more than 16, auto 
> detection will be kept IMHO.

Until the final values of priorities will be backported to all kernels we
support.  You don't see far enough in the future.

> Besides, there will be a bundle of resource creation and removal in
> this standalone function, I'm not sure it valuable to duplicate them,
> please refer to function mlx5_flow_create_drop_queue().

You misunderstood, I am not asking you to not use the default drop
queues but instead of making an rte_flow attributes, items and actions to
make directly the Verbs specification on the stack.  It will be faster
than making a bunch of conversions (relying on malloc) from rte_flow to
Verbs whereas you know exactly what it needs i.e. 1 spec.

Thanks,
  
Xueming Li April 12, 2018, 1:43 p.m. UTC | #4
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Thursday, April 12, 2018 5:09 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware priorities

> 

> On Tue, Apr 10, 2018 at 03:22:46PM +0000, Xueming(Steven) Li wrote:

> > Hi Nelio,

> >

> > > -----Original Message-----

> > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > Sent: Tuesday, April 10, 2018 10:42 PM

> > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware

> > > priorities

> > >

> > > On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:

> > > > Adjust flow priority mapping to adapt new hardware 16 verb flow

> > > > priorites support:

> > > > 0-3: RTE FLOW tunnel rule

> > > > 4-7: RTE FLOW non-tunnel rule

> > > > 8-15: PMD control flow

> > >

> > > This commit log is inducing people in error, this amount of priority

> > > depends on the Mellanox OFED installed, it is not available on

> > > upstream Linux kernel yet nor in the current Mellanox OFED GA.

> > >

> > > What happens when those amount of priority are not available, is it

> > > removing a functionality?  Will it collide with other flows?

> >

> > If 16  priorities not available, simply behavior as 8 priorities.

> 

> It is not described in the commit log, please add it.

> 

> > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> <snip/>

> > > >  	},

> > > >  	[HASH_RXQ_ETH] = {

> > > >  		.hash_fields = 0,

> > > >  		.dpdk_rss_hf = 0,

> > > > -		.flow_priority = 3,

> > > > +		.flow_priority = 2,

> > > >  	},

> > > >  };

> > >

> > > If the amount of priorities remains 8, you are removing the priority

> > > for the tunnel flows introduced by commit 749365717f5c ("net/mlx5:

> > > change tunnel flow priority")

> > >

> > > Please keep this functionality when this patch fails to get the

> > > expected

> > > 16 Verbs priorities.

> >

> > These priority shift are different in 16 priorities scenario, I

> > changed it to calculation. In function mlx5_flow_priorities_detect(),

> > priority shift will be 1 if 8 priorities, 4 in case of 16 priorities.

> > Please refer to changes in function mlx5_flow_update_priority() as well.

> 

> Please light my lamp, I don't see it...


Sorry, please refer to priv->config.flow_priority_shift.

> 

> <snip/>

> > > >  static void

> > > > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,

> > > > +mlx5_flow_update_priority(struct rte_eth_dev *dev,

> > > > +			  struct mlx5_flow_parse *parser,

> > > >  			  const struct rte_flow_attr *attr)  {

> > > > +	struct priv *priv = dev->data->dev_private;

> > > >  	unsigned int i;

> > > > +	uint16_t priority;

> > > >

> > > > +	if (priv->config.flow_priority_shift == 1)

> > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;

> > > > +	else

> > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;

> > > > +	if (!parser->inner)

> > > > +		priority += priv->config.flow_priority_shift;


Here, if non-tunnel flow, lower(increase) 1 for 8 priorities, lower 4 otherwise.
I'll append a comment here.

> > > >  	if (parser->drop) {

> > > > -		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =

> > > > -			attr->priority +

> > > > -			hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> > > > +		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =

> priority +

> > > > +				hash_rxq_init[HASH_RXQ_ETH].flow_priority;

> > > >  		return;

> > > >  	}

> > > >  	for (i = 0; i != hash_rxq_init_n; ++i) {

> > > > -		if (parser->queue[i].ibv_attr) {

> > > > -			parser->queue[i].ibv_attr->priority =

> > > > -				attr->priority +

> > > > -				hash_rxq_init[i].flow_priority -

> > > > -				(parser->inner ? 1 : 0);

> > > > -		}

> > > > +		if (!parser->queue[i].ibv_attr)

> > > > +			continue;

> > > > +		parser->queue[i].ibv_attr->priority = priority +

> > > > +				hash_rxq_init[i].flow_priority;

> 

> Previous code was subtracting one from the table priorities which was

> starting at 1.  In the new code I don't see it.

> 

> What I am missing?


Please refer to new comment above around variable "priority" calculation.

> 

> > > >  	}

> > > >  }

> > > >

> > > > @@ -1087,7 +1097,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> > > >  		.layer = HASH_RXQ_ETH,

> > > >  		.mark_id = MLX5_FLOW_MARK_DEFAULT,

> > > >  	};

> > > > -	ret = mlx5_flow_convert_attributes(attr, error);

> > > > +	ret = mlx5_flow_convert_attributes(dev, attr, error);

> > > >  	if (ret)

> > > >  		return ret;

> > > >  	ret = mlx5_flow_convert_actions(dev, actions, error, parser);

> @@

> > > > -1158,7 +1168,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,

> > > >  	 */

> > > >  	if (!parser->drop)

> > > >  		mlx5_flow_convert_finalise(parser);

> > > > -	mlx5_flow_update_priority(parser, attr);

> > > > +	mlx5_flow_update_priority(dev, parser, attr);

> > > >  exit_free:

> > > >  	/* Only verification is expected, all resources should be

> released.

> > > */

> > > >  	if (!parser->create) {

> > > > @@ -2450,7 +2460,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,

> > > >  	struct priv *priv = dev->data->dev_private;

> > > >  	const struct rte_flow_attr attr = {

> > > >  		.ingress = 1,

> > > > -		.priority = MLX5_CTRL_FLOW_PRIORITY,

> > > > +		.priority = priv->config.control_flow_priority,

> > > >  	};

> > > >  	struct rte_flow_item items[] = {

> > > >  		{

> > > > @@ -3161,3 +3171,50 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,

> > > >  	}

> > > >  	return 0;

> > > >  }

> > > > +

> > > > +/**

> > > > + * Detect number of Verbs flow priorities supported.

> > > > + *

> > > > + * @param dev

> > > > + *   Pointer to Ethernet device.

> > > > + */

> > > > +void

> > > > +mlx5_flow_priorities_detect(struct rte_eth_dev *dev) {

> > > > +	struct priv *priv = dev->data->dev_private;

> > > > +	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;

> > > > +	struct {

> > > > +		struct ibv_flow_attr attr;

> > > > +		struct ibv_flow_spec_eth eth;

> > > > +		struct ibv_flow_spec_action_drop drop;

> > > > +	} flow_attr = {

> > > > +		.attr = {

> > > > +			.num_of_specs = 2,

> > > > +			.priority = verb_priorities - 1,

> > > > +		},

> > > > +		.eth = {

> > > > +			.type = IBV_FLOW_SPEC_ETH,

> > > > +			.size = sizeof(struct ibv_flow_spec_eth),

> > > > +		},

> > > > +		.drop = {

> > > > +			.size = sizeof(struct ibv_flow_spec_action_drop),

> > > > +			.type = IBV_FLOW_SPEC_ACTION_DROP,

> > > > +		},

> > > > +	};

> > > > +	struct ibv_flow *flow;

> > > > +

> > > > +	if (priv->config.control_flow_priority)

> > > > +		return;

> > > > +	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,

> > > > +				      &flow_attr.attr);

> > > > +	if (flow) {

> > > > +		priv->config.flow_priority_shift =

> MLX5_VERBS_FLOW_PRIO_8 / 2;

> > > > +		claim_zero(mlx5_glue->destroy_flow(flow));

> > > > +	} else {

> > > > +		priv->config.flow_priority_shift = 1;

> > > > +		verb_priorities = verb_priorities / 2;

> > > > +	}

> > > > +	priv->config.control_flow_priority = 1;

> > > > +	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",

> > > > +		dev->data->port_id, verb_priorities); }

> > > > diff --git a/drivers/net/mlx5/mlx5_trigger.c

> > > > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688

> > > > 100644

> > > > --- a/drivers/net/mlx5/mlx5_trigger.c

> > > > +++ b/drivers/net/mlx5/mlx5_trigger.c

> > > > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)

> > > >  	int ret;

> > > >

> > > >  	dev->data->dev_started = 1;

> > > > -	ret = mlx5_flow_create_drop_queue(dev);

> > > > -	if (ret) {

> > > > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > > > -			dev->data->port_id, strerror(rte_errno));

> > > > -		goto error;

> > > > -	}

> > > >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx

> queues",

> > > >  		dev->data->port_id);

> > > >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@

> > > > mlx5_dev_start(struct rte_eth_dev *dev)

> > > >  	mlx5_traffic_disable(dev);

> > > >  	mlx5_txq_stop(dev);

> > > >  	mlx5_rxq_stop(dev);

> > > > -	mlx5_flow_delete_drop_queue(dev);

> > > >  	rte_errno = ret; /* Restore rte_errno. */

> > > >  	return -rte_errno;

> > > >  }

> > > > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)

> > > >  	mlx5_rxq_stop(dev);

> > > >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv-

> >mr))

> > > >  		mlx5_mr_release(mr);

> > > > -	mlx5_flow_delete_drop_queue(dev);

> > > >  }

> > > >

> > > >  /**

> > > > --

> > > > 2.13.3

> > >

> > > I have few concerns on this, mlx5_pci_probe() will also probe any

> > > under layer verbs device, and in a near future the representors

> > > associated to a VF.

> > > Making such detection should only be done once by the PF, I also

> > > wander if it is possible to make such drop action in a representor

> > > directly using Verbs.

> >

> > Then there should be some work to disable flows in representors? that

> > supposed to cover this.

> 

> The code raising another Verbs device is already present and since the

> first entrance of this PMD in the DPDK tree, you must respect the code

> already present.

> This request is not related directly to a new feature but to an existing

> one, the representors being just an example.

> 

> This detection should be only done once and not for each of them.


Could you please elaborate on "under layer verbs device" and how to judge
dependency to PF, is there a probe order between them?

BTW, VF representor code seems exists in 17.11, not upstream.

> 

> > > Another concern is, this patch will be reverted in some time when

> > > those

> > > 16 priority will be always available.  It will be easier to remove

> > > this detection function than searching for all those modifications.

> > >

> > > I would suggest to have a standalone mlx5_flow_priorities_detect()

> > > which creates and deletes all resources needed for this detection.

> >

> > There is an upcoming new feature to support priorities more than 16,

> > auto detection will be kept IMHO.

> 

> Until the final values of priorities will be backported to all kernels we

> support.  You don't see far enough in the future.

> 

> > Besides, there will be a bundle of resource creation and removal in

> > this standalone function, I'm not sure it valuable to duplicate them,

> > please refer to function mlx5_flow_create_drop_queue().

> 

> You misunderstood, I am not asking you to not use the default drop queues

> but instead of making an rte_flow attributes, items and actions to make

> directly the Verbs specification on the stack.  It will be faster than

> making a bunch of conversions (relying on malloc) from rte_flow to Verbs

> whereas you know exactly what it needs i.e. 1 spec.


Sorry, still confused, mlx5_flow_priorities_detect() invokes ibv_destroy_flow(),
not rte_flow stuff, no malloc at all. BTW, mlx5 flow api bypass verb flow in 
offline mode, we can't use it to create flows at such stage.

> 

> Thanks,

> 

> --

> Nélio Laranjeiro

> 6WIND
  
Nélio Laranjeiro April 12, 2018, 2:02 p.m. UTC | #5
On Thu, Apr 12, 2018 at 01:43:04PM +0000, Xueming(Steven) Li wrote:
> 
> 
> > -----Original Message-----
> > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > Sent: Thursday, April 12, 2018 5:09 PM
> > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware priorities
> > 
> > On Tue, Apr 10, 2018 at 03:22:46PM +0000, Xueming(Steven) Li wrote:
> > > Hi Nelio,
> > >
> > > > -----Original Message-----
> > > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>
> > > > Sent: Tuesday, April 10, 2018 10:42 PM
> > > > To: Xueming(Steven) Li <xuemingl@mellanox.com>
> > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org
> > > > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware
> > > > priorities
> > > >
> > > > On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:
> > > > > Adjust flow priority mapping to adapt new hardware 16 verb flow
> > > > > priorites support:
> > > > > 0-3: RTE FLOW tunnel rule
> > > > > 4-7: RTE FLOW non-tunnel rule
> > > > > 8-15: PMD control flow
> > > >
> > > > This commit log is inducing people in error, this amount of priority
> > > > depends on the Mellanox OFED installed, it is not available on
> > > > upstream Linux kernel yet nor in the current Mellanox OFED GA.
> > > >
> > > > What happens when those amount of priority are not available, is it
> > > > removing a functionality?  Will it collide with other flows?
> > >
> > > If 16  priorities not available, simply behavior as 8 priorities.
> > 
> > It is not described in the commit log, please add it.
> > 
> > > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>
> > <snip/>
> > > > >  	},
> > > > >  	[HASH_RXQ_ETH] = {
> > > > >  		.hash_fields = 0,
> > > > >  		.dpdk_rss_hf = 0,
> > > > > -		.flow_priority = 3,
> > > > > +		.flow_priority = 2,
> > > > >  	},
> > > > >  };
> > > >
> > > > If the amount of priorities remains 8, you are removing the priority
> > > > for the tunnel flows introduced by commit 749365717f5c ("net/mlx5:
> > > > change tunnel flow priority")
> > > >
> > > > Please keep this functionality when this patch fails to get the
> > > > expected
> > > > 16 Verbs priorities.
> > >
> > > These priority shift are different in 16 priorities scenario, I
> > > changed it to calculation. In function mlx5_flow_priorities_detect(),
> > > priority shift will be 1 if 8 priorities, 4 in case of 16 priorities.
> > > Please refer to changes in function mlx5_flow_update_priority() as well.
> > 
> > Please light my lamp, I don't see it...
> 
> Sorry, please refer to priv->config.flow_priority_shift.
> 
> > 
> > <snip/>
> > > > >  static void
> > > > > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
> > > > > +mlx5_flow_update_priority(struct rte_eth_dev *dev,
> > > > > +			  struct mlx5_flow_parse *parser,
> > > > >  			  const struct rte_flow_attr *attr)  {
> > > > > +	struct priv *priv = dev->data->dev_private;
> > > > >  	unsigned int i;
> > > > > +	uint16_t priority;
> > > > >
> > > > > +	if (priv->config.flow_priority_shift == 1)
> > > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;
> > > > > +	else
> > > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
> > > > > +	if (!parser->inner)
> > > > > +		priority += priv->config.flow_priority_shift;
> 
> Here, if non-tunnel flow, lower(increase) 1 for 8 priorities, lower 4 otherwise.
> I'll append a comment here.

Thanks, I totally missed this one.

<snip/>
> > > > > diff --git a/drivers/net/mlx5/mlx5_trigger.c
> > > > > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688
> > > > > 100644
> > > > > --- a/drivers/net/mlx5/mlx5_trigger.c
> > > > > +++ b/drivers/net/mlx5/mlx5_trigger.c
> > > > > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
> > > > >  	int ret;
> > > > >
> > > > >  	dev->data->dev_started = 1;
> > > > > -	ret = mlx5_flow_create_drop_queue(dev);
> > > > > -	if (ret) {
> > > > > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
> > > > > -			dev->data->port_id, strerror(rte_errno));
> > > > > -		goto error;
> > > > > -	}
> > > > >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx
> > queues",
> > > > >  		dev->data->port_id);
> > > > >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@
> > > > > mlx5_dev_start(struct rte_eth_dev *dev)
> > > > >  	mlx5_traffic_disable(dev);
> > > > >  	mlx5_txq_stop(dev);
> > > > >  	mlx5_rxq_stop(dev);
> > > > > -	mlx5_flow_delete_drop_queue(dev);
> > > > >  	rte_errno = ret; /* Restore rte_errno. */
> > > > >  	return -rte_errno;
> > > > >  }
> > > > > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
> > > > >  	mlx5_rxq_stop(dev);
> > > > >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv-
> > >mr))
> > > > >  		mlx5_mr_release(mr);
> > > > > -	mlx5_flow_delete_drop_queue(dev);
> > > > >  }
> > > > >
> > > > >  /**
> > > > > --
> > > > > 2.13.3
> > > >
> > > > I have few concerns on this, mlx5_pci_probe() will also probe any
> > > > under layer verbs device, and in a near future the representors
> > > > associated to a VF.
> > > > Making such detection should only be done once by the PF, I also
> > > > wander if it is possible to make such drop action in a representor
> > > > directly using Verbs.
> > >
> > > Then there should be some work to disable flows in representors? that
> > > supposed to cover this.
> > 
> > The code raising another Verbs device is already present and since the
> > first entrance of this PMD in the DPDK tree, you must respect the code
> > already present.
> > This request is not related directly to a new feature but to an existing
> > one, the representors being just an example.
> > 
> > This detection should be only done once and not for each of them.
> 
> Could you please elaborate on "under layer verbs device" and how to judge
> dependency to PF, is there a probe order between them?

The place where you are adding the mlx5_flow_create_drop_queue() in
mlx5_pci_probe() is probing any device returned by the verbs
ibv_get_device_list().

This code is also present in mlx4 where for a single PCI id there are 2
physical ports.

If the NIC handle several ibvdev (Verbs devices) it will create a new
rte_eth_dev for each one.

> BTW, VF representor code seems exists in 17.11, not upstream.
> 
> > 
> > > > Another concern is, this patch will be reverted in some time when
> > > > those
> > > > 16 priority will be always available.  It will be easier to remove
> > > > this detection function than searching for all those modifications.
> > > >
> > > > I would suggest to have a standalone mlx5_flow_priorities_detect()
> > > > which creates and deletes all resources needed for this detection.
> > >
> > > There is an upcoming new feature to support priorities more than 16,
> > > auto detection will be kept IMHO.
> > 
> > Until the final values of priorities will be backported to all kernels we
> > support.  You don't see far enough in the future.
> > 
> > > Besides, there will be a bundle of resource creation and removal in
> > > this standalone function, I'm not sure it valuable to duplicate them,
> > > please refer to function mlx5_flow_create_drop_queue().
> > 
> > You misunderstood, I am not asking you to not use the default drop queues
> > but instead of making an rte_flow attributes, items and actions to make
> > directly the Verbs specification on the stack.  It will be faster than
> > making a bunch of conversions (relying on malloc) from rte_flow to Verbs
> > whereas you know exactly what it needs i.e. 1 spec.
> 
> Sorry, still confused, mlx5_flow_priorities_detect() invokes ibv_destroy_flow(),
> not rte_flow stuff, no malloc at all. BTW, mlx5 flow api bypass verb flow in 
> offline mode, we can't use it to create flows at such stage.

Sorry I was the one confused. Priority detection is Ok.

After reading this, I'll suggest to use a boolean in the
mlx5_pci_probe() to only make this detection once, the underlying verbs
devices will inherit from such knowledge and adjust their own shift
accordingly.

What do you think?
  
Xueming Li April 12, 2018, 2:46 p.m. UTC | #6
> -----Original Message-----

> From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> Sent: Thursday, April 12, 2018 10:03 PM

> To: Xueming(Steven) Li <xuemingl@mellanox.com>

> Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware priorities

> 

> On Thu, Apr 12, 2018 at 01:43:04PM +0000, Xueming(Steven) Li wrote:

> >

> >

> > > -----Original Message-----

> > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > Sent: Thursday, April 12, 2018 5:09 PM

> > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware

> > > priorities

> > >

> > > On Tue, Apr 10, 2018 at 03:22:46PM +0000, Xueming(Steven) Li wrote:

> > > > Hi Nelio,

> > > >

> > > > > -----Original Message-----

> > > > > From: Nélio Laranjeiro <nelio.laranjeiro@6wind.com>

> > > > > Sent: Tuesday, April 10, 2018 10:42 PM

> > > > > To: Xueming(Steven) Li <xuemingl@mellanox.com>

> > > > > Cc: Shahaf Shuler <shahafs@mellanox.com>; dev@dpdk.org

> > > > > Subject: Re: [PATCH v2 01/15] net/mlx5: support 16 hardware

> > > > > priorities

> > > > >

> > > > > On Tue, Apr 10, 2018 at 09:34:01PM +0800, Xueming Li wrote:

> > > > > > Adjust flow priority mapping to adapt new hardware 16 verb

> > > > > > flow priorites support:

> > > > > > 0-3: RTE FLOW tunnel rule

> > > > > > 4-7: RTE FLOW non-tunnel rule

> > > > > > 8-15: PMD control flow

> > > > >

> > > > > This commit log is inducing people in error, this amount of

> > > > > priority depends on the Mellanox OFED installed, it is not

> > > > > available on upstream Linux kernel yet nor in the current Mellanox

> OFED GA.

> > > > >

> > > > > What happens when those amount of priority are not available, is

> > > > > it removing a functionality?  Will it collide with other flows?

> > > >

> > > > If 16  priorities not available, simply behavior as 8 priorities.

> > >

> > > It is not described in the commit log, please add it.

> > >

> > > > > > Signed-off-by: Xueming Li <xuemingl@mellanox.com>

> > > <snip/>

> > > > > >  	},

> > > > > >  	[HASH_RXQ_ETH] = {

> > > > > >  		.hash_fields = 0,

> > > > > >  		.dpdk_rss_hf = 0,

> > > > > > -		.flow_priority = 3,

> > > > > > +		.flow_priority = 2,

> > > > > >  	},

> > > > > >  };

> > > > >

> > > > > If the amount of priorities remains 8, you are removing the

> > > > > priority for the tunnel flows introduced by commit 749365717f5c

> ("net/mlx5:

> > > > > change tunnel flow priority")

> > > > >

> > > > > Please keep this functionality when this patch fails to get the

> > > > > expected

> > > > > 16 Verbs priorities.

> > > >

> > > > These priority shift are different in 16 priorities scenario, I

> > > > changed it to calculation. In function

> > > > mlx5_flow_priorities_detect(), priority shift will be 1 if 8

> priorities, 4 in case of 16 priorities.

> > > > Please refer to changes in function mlx5_flow_update_priority() as

> well.

> > >

> > > Please light my lamp, I don't see it...

> >

> > Sorry, please refer to priv->config.flow_priority_shift.

> >

> > >

> > > <snip/>

> > > > > >  static void

> > > > > > -mlx5_flow_update_priority(struct mlx5_flow_parse *parser,

> > > > > > +mlx5_flow_update_priority(struct rte_eth_dev *dev,

> > > > > > +			  struct mlx5_flow_parse *parser,

> > > > > >  			  const struct rte_flow_attr *attr)  {

> > > > > > +	struct priv *priv = dev->data->dev_private;

> > > > > >  	unsigned int i;

> > > > > > +	uint16_t priority;

> > > > > >

> > > > > > +	if (priv->config.flow_priority_shift == 1)

> > > > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;

> > > > > > +	else

> > > > > > +		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;

> > > > > > +	if (!parser->inner)

> > > > > > +		priority += priv->config.flow_priority_shift;

> >

> > Here, if non-tunnel flow, lower(increase) 1 for 8 priorities, lower 4

> otherwise.

> > I'll append a comment here.

> 

> Thanks, I totally missed this one.

> 

> <snip/>

> > > > > > diff --git a/drivers/net/mlx5/mlx5_trigger.c

> > > > > > b/drivers/net/mlx5/mlx5_trigger.c index 6bb4ffb14..d80a2e688

> > > > > > 100644

> > > > > > --- a/drivers/net/mlx5/mlx5_trigger.c

> > > > > > +++ b/drivers/net/mlx5/mlx5_trigger.c

> > > > > > @@ -148,12 +148,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)

> > > > > >  	int ret;

> > > > > >

> > > > > >  	dev->data->dev_started = 1;

> > > > > > -	ret = mlx5_flow_create_drop_queue(dev);

> > > > > > -	if (ret) {

> > > > > > -		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",

> > > > > > -			dev->data->port_id, strerror(rte_errno));

> > > > > > -		goto error;

> > > > > > -	}

> > > > > >  	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx

> > > queues",

> > > > > >  		dev->data->port_id);

> > > > > >  	rte_mempool_walk(mlx5_mp2mr_iter, priv); @@ -202,7 +196,6 @@

> > > > > > mlx5_dev_start(struct rte_eth_dev *dev)

> > > > > >  	mlx5_traffic_disable(dev);

> > > > > >  	mlx5_txq_stop(dev);

> > > > > >  	mlx5_rxq_stop(dev);

> > > > > > -	mlx5_flow_delete_drop_queue(dev);

> > > > > >  	rte_errno = ret; /* Restore rte_errno. */

> > > > > >  	return -rte_errno;

> > > > > >  }

> > > > > > @@ -237,7 +230,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)

> > > > > >  	mlx5_rxq_stop(dev);

> > > > > >  	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv-

> > > >mr))

> > > > > >  		mlx5_mr_release(mr);

> > > > > > -	mlx5_flow_delete_drop_queue(dev);

> > > > > >  }

> > > > > >

> > > > > >  /**

> > > > > > --

> > > > > > 2.13.3

> > > > >

> > > > > I have few concerns on this, mlx5_pci_probe() will also probe

> > > > > any under layer verbs device, and in a near future the

> > > > > representors associated to a VF.

> > > > > Making such detection should only be done once by the PF, I also

> > > > > wander if it is possible to make such drop action in a

> > > > > representor directly using Verbs.

> > > >

> > > > Then there should be some work to disable flows in representors?

> > > > that supposed to cover this.

> > >

> > > The code raising another Verbs device is already present and since

> > > the first entrance of this PMD in the DPDK tree, you must respect

> > > the code already present.

> > > This request is not related directly to a new feature but to an

> > > existing one, the representors being just an example.

> > >

> > > This detection should be only done once and not for each of them.

> >

> > Could you please elaborate on "under layer verbs device" and how to

> > judge dependency to PF, is there a probe order between them?

> 

> The place where you are adding the mlx5_flow_create_drop_queue() in

> mlx5_pci_probe() is probing any device returned by the verbs

> ibv_get_device_list().

> 

> This code is also present in mlx4 where for a single PCI id there are 2

> physical ports.

> 

> If the NIC handle several ibvdev (Verbs devices) it will create a new

> rte_eth_dev for each one.

> 

> > BTW, VF representor code seems exists in 17.11, not upstream.

> >

> > >

> > > > > Another concern is, this patch will be reverted in some time

> > > > > when those

> > > > > 16 priority will be always available.  It will be easier to

> > > > > remove this detection function than searching for all those

> modifications.

> > > > >

> > > > > I would suggest to have a standalone

> > > > > mlx5_flow_priorities_detect() which creates and deletes all

> resources needed for this detection.

> > > >

> > > > There is an upcoming new feature to support priorities more than

> > > > 16, auto detection will be kept IMHO.

> > >

> > > Until the final values of priorities will be backported to all

> > > kernels we support.  You don't see far enough in the future.

> > >

> > > > Besides, there will be a bundle of resource creation and removal

> > > > in this standalone function, I'm not sure it valuable to duplicate

> > > > them, please refer to function mlx5_flow_create_drop_queue().

> > >

> > > You misunderstood, I am not asking you to not use the default drop

> > > queues but instead of making an rte_flow attributes, items and

> > > actions to make directly the Verbs specification on the stack.  It

> > > will be faster than making a bunch of conversions (relying on

> > > malloc) from rte_flow to Verbs whereas you know exactly what it needs

> i.e. 1 spec.

> >

> > Sorry, still confused, mlx5_flow_priorities_detect() invokes

> > ibv_destroy_flow(), not rte_flow stuff, no malloc at all. BTW, mlx5

> > flow api bypass verb flow in offline mode, we can't use it to create

> flows at such stage.

> 

> Sorry I was the one confused. Priority detection is Ok.

> 

> After reading this, I'll suggest to use a boolean in the

> mlx5_pci_probe() to only make this detection once, the underlying verbs

> devices will inherit from such knowledge and adjust their own shift

> accordingly.

> 

> What do you think?


Finally got it, fortunately, Shahaf has similar suggestion in 17.11 review.
I'll make mlx5_flow_priorities_detect() simply return number of supported 
priorities, and this number will be used to make this function rule only once
in loop. Many thanks for your suggestion, I'm interpreting it in complex.

BTW, if no other comments on this series, I'll upload a new version.

> 

> --

> Nélio Laranjeiro

> 6WIND
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index cfab55897..a1f2799e5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -197,6 +197,7 @@  mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_flow_delete_drop_queue(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(mlx5_glue->dealloc_pd(priv->pd));
@@ -993,6 +994,15 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		mlx5_set_link_up(eth_dev);
 		/* Store device configuration on private structure. */
 		priv->config = config;
+		/* Create drop queue. */
+		err = mlx5_flow_create_drop_queue(eth_dev);
+		if (err) {
+			DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
+				eth_dev->data->port_id, strerror(rte_errno));
+			goto port_error;
+		}
+		/* Supported flow priority number detection. */
+		mlx5_flow_priorities_detect(eth_dev);
 		continue;
 port_error:
 		if (priv)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 63b24e6bb..708272f6d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -89,6 +89,8 @@  struct mlx5_dev_config {
 	unsigned int rx_vec_en:1; /* Rx vector is enabled. */
 	unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+	unsigned int flow_priority_shift; /* Non-tunnel flow priority shift. */
+	unsigned int control_flow_priority; /* Control flow priority. */
 	unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
 	int txq_inline; /* Maximum packet size for inlining. */
@@ -105,6 +107,11 @@  enum mlx5_verbs_alloc_type {
 	MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
 };
 
+/* 8 Verbs priorities per flow. */
+#define MLX5_VERBS_FLOW_PRIO_8 8
+/* 4 Verbs priorities per flow. */
+#define MLX5_VERBS_FLOW_PRIO_4 4
+
 /**
  * Verbs allocator needs a context to know in the callback which kind of
  * resources it is allocating.
@@ -253,6 +260,7 @@  int mlx5_traffic_restart(struct rte_eth_dev *dev);
 
 /* mlx5_flow.c */
 
+void mlx5_flow_priorities_detect(struct rte_eth_dev *dev);
 int mlx5_flow_validate(struct rte_eth_dev *dev,
 		       const struct rte_flow_attr *attr,
 		       const struct rte_flow_item items[],
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 288610620..394760418 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -32,9 +32,6 @@ 
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
 
-/* Define minimal priority for control plane flows. */
-#define MLX5_CTRL_FLOW_PRIORITY 4
-
 /* Internet Protocol versions. */
 #define MLX5_IPV4 4
 #define MLX5_IPV6 6
@@ -129,7 +126,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_TCP |
 				IBV_RX_HASH_DST_PORT_TCP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_UDPV4] = {
@@ -138,7 +135,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_UDP |
 				IBV_RX_HASH_DST_PORT_UDP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_IPV4] = {
@@ -146,7 +143,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_DST_IPV4),
 		.dpdk_rss_hf = (ETH_RSS_IPV4 |
 				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 2,
+		.flow_priority = 1,
 		.ip_version = MLX5_IPV4,
 	},
 	[HASH_RXQ_TCPV6] = {
@@ -155,7 +152,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_TCP |
 				IBV_RX_HASH_DST_PORT_TCP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_UDPV6] = {
@@ -164,7 +161,7 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_SRC_PORT_UDP |
 				IBV_RX_HASH_DST_PORT_UDP),
 		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 1,
+		.flow_priority = 0,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_IPV6] = {
@@ -172,13 +169,13 @@  const struct hash_rxq_init hash_rxq_init[] = {
 				IBV_RX_HASH_DST_IPV6),
 		.dpdk_rss_hf = (ETH_RSS_IPV6 |
 				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 2,
+		.flow_priority = 1,
 		.ip_version = MLX5_IPV6,
 	},
 	[HASH_RXQ_ETH] = {
 		.hash_fields = 0,
 		.dpdk_rss_hf = 0,
-		.flow_priority = 3,
+		.flow_priority = 2,
 	},
 };
 
@@ -536,6 +533,8 @@  mlx5_flow_item_validate(const struct rte_flow_item *item,
 /**
  * Extract attribute to the parser.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[out] error
@@ -545,9 +544,12 @@  mlx5_flow_item_validate(const struct rte_flow_item *item,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
+mlx5_flow_convert_attributes(struct rte_eth_dev *dev,
+			     const struct rte_flow_attr *attr,
 			     struct rte_flow_error *error)
 {
+	struct priv *priv = dev->data->dev_private;
+
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -555,7 +557,7 @@  mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
 				   "groups are not supported");
 		return -rte_errno;
 	}
-	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
+	if (attr->priority > priv->config.control_flow_priority) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
 				   NULL,
@@ -900,30 +902,38 @@  mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
  * Make inner packet matching with an higher priority from the non Inner
  * matching.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in, out] parser
  *   Internal parser structure.
  * @param attr
  *   User flow attribute.
  */
 static void
-mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
+mlx5_flow_update_priority(struct rte_eth_dev *dev,
+			  struct mlx5_flow_parse *parser,
 			  const struct rte_flow_attr *attr)
 {
+	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
+	uint16_t priority;
 
+	if (priv->config.flow_priority_shift == 1)
+		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_4;
+	else
+		priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
+	if (!parser->inner)
+		priority += priv->config.flow_priority_shift;
 	if (parser->drop) {
-		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
-			attr->priority +
-			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+		parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
+				hash_rxq_init[HASH_RXQ_ETH].flow_priority;
 		return;
 	}
 	for (i = 0; i != hash_rxq_init_n; ++i) {
-		if (parser->queue[i].ibv_attr) {
-			parser->queue[i].ibv_attr->priority =
-				attr->priority +
-				hash_rxq_init[i].flow_priority -
-				(parser->inner ? 1 : 0);
-		}
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		parser->queue[i].ibv_attr->priority = priority +
+				hash_rxq_init[i].flow_priority;
 	}
 }
 
@@ -1087,7 +1097,7 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 		.layer = HASH_RXQ_ETH,
 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
-	ret = mlx5_flow_convert_attributes(attr, error);
+	ret = mlx5_flow_convert_attributes(dev, attr, error);
 	if (ret)
 		return ret;
 	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
@@ -1158,7 +1168,7 @@  mlx5_flow_convert(struct rte_eth_dev *dev,
 	 */
 	if (!parser->drop)
 		mlx5_flow_convert_finalise(parser);
-	mlx5_flow_update_priority(parser, attr);
+	mlx5_flow_update_priority(dev, parser, attr);
 exit_free:
 	/* Only verification is expected, all resources should be released. */
 	if (!parser->create) {
@@ -2450,7 +2460,7 @@  mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 	const struct rte_flow_attr attr = {
 		.ingress = 1,
-		.priority = MLX5_CTRL_FLOW_PRIORITY,
+		.priority = priv->config.control_flow_priority,
 	};
 	struct rte_flow_item items[] = {
 		{
@@ -3161,3 +3171,50 @@  mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	}
 	return 0;
 }
+
+/**
+ * Detect number of Verbs flow priorities supported.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ */
+void
+mlx5_flow_priorities_detect(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint32_t verb_priorities = MLX5_VERBS_FLOW_PRIO_8 * 2;
+	struct {
+		struct ibv_flow_attr attr;
+		struct ibv_flow_spec_eth eth;
+		struct ibv_flow_spec_action_drop drop;
+	} flow_attr = {
+		.attr = {
+			.num_of_specs = 2,
+			.priority = verb_priorities - 1,
+		},
+		.eth = {
+			.type = IBV_FLOW_SPEC_ETH,
+			.size = sizeof(struct ibv_flow_spec_eth),
+		},
+		.drop = {
+			.size = sizeof(struct ibv_flow_spec_action_drop),
+			.type = IBV_FLOW_SPEC_ACTION_DROP,
+		},
+	};
+	struct ibv_flow *flow;
+
+	if (priv->config.control_flow_priority)
+		return;
+	flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
+				      &flow_attr.attr);
+	if (flow) {
+		priv->config.flow_priority_shift = MLX5_VERBS_FLOW_PRIO_8 / 2;
+		claim_zero(mlx5_glue->destroy_flow(flow));
+	} else {
+		priv->config.flow_priority_shift = 1;
+		verb_priorities = verb_priorities / 2;
+	}
+	priv->config.control_flow_priority = 1;
+	DRV_LOG(INFO, "port %u Verbs flow priorities: %d",
+		dev->data->port_id, verb_priorities);
+}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 6bb4ffb14..d80a2e688 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -148,12 +148,6 @@  mlx5_dev_start(struct rte_eth_dev *dev)
 	int ret;
 
 	dev->data->dev_started = 1;
-	ret = mlx5_flow_create_drop_queue(dev);
-	if (ret) {
-		DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
-			dev->data->port_id, strerror(rte_errno));
-		goto error;
-	}
 	DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
 		dev->data->port_id);
 	rte_mempool_walk(mlx5_mp2mr_iter, priv);
@@ -202,7 +196,6 @@  mlx5_dev_start(struct rte_eth_dev *dev)
 	mlx5_traffic_disable(dev);
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
-	mlx5_flow_delete_drop_queue(dev);
 	rte_errno = ret; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -237,7 +230,6 @@  mlx5_dev_stop(struct rte_eth_dev *dev)
 	mlx5_rxq_stop(dev);
 	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
 		mlx5_mr_release(mr);
-	mlx5_flow_delete_drop_queue(dev);
 }
 
 /**