[v6,8/9] ethdev: representor iterator compare complete info

Message ID 1613272907-22563-9-git-send-email-xuemingl@nvidia.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series ethdev: support SubFunction representor |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xueming Li Feb. 14, 2021, 3:21 a.m. UTC
  The NIC can have multiple PCIe links and can be attached to multiple
hosts, for example the same single NIC can be shared for multiple server
units in the rack. On each PCIe link NIC can provide multiple PFs and
VFs/SFs based on these ones. The full representor identifier consists of
three indices - controller index, PF index, and VF or SF index (if any).

SR-IOV and SubFunction are created on top of PF. PF index is introduced
because there might be multiple PFs in the bonding configuration and
only bonding device is probed.

In eth representor comparator callback, ethdev representor ID was
compared with devarg. Since controller index and PF index not compared,
callback returned representor from other PF or controller.

This patch adds new API to convert representor controller, pf and vf/sf
index to representor ID. Representor comparer callback convert
representor info into ID and compare with device representor ID.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 lib/librte_ethdev/ethdev_driver.h | 32 ++++++++++++
 lib/librte_ethdev/rte_class_eth.c | 38 ++++++++++----
 lib/librte_ethdev/rte_ethdev.c    | 83 +++++++++++++++++++++++++++++++
 lib/librte_ethdev/version.map     |  1 +
 4 files changed, 145 insertions(+), 9 deletions(-)
  

Comments

Andrew Rybchenko Feb. 15, 2021, 9:31 a.m. UTC | #1
On 2/14/21 6:21 AM, Xueming Li wrote:
> The NIC can have multiple PCIe links and can be attached to multiple
> hosts, for example the same single NIC can be shared for multiple server
> units in the rack. On each PCIe link NIC can provide multiple PFs and
> VFs/SFs based on these ones. The full representor identifier consists of
> three indices - controller index, PF index, and VF or SF index (if any).
> 
> SR-IOV and SubFunction are created on top of PF. PF index is introduced
> because there might be multiple PFs in the bonding configuration and
> only bonding device is probed.
> 
> In eth representor comparator callback, ethdev representor ID was
> compared with devarg. Since controller index and PF index not compared,
> callback returned representor from other PF or controller.
> 
> This patch adds new API to convert representor controller, pf and vf/sf
> index to representor ID. Representor comparer callback convert
> representor info into ID and compare with device representor ID.
> 
> Signed-off-by: Xueming Li <xuemingl@nvidia.com>
> ---
>  lib/librte_ethdev/ethdev_driver.h | 32 ++++++++++++
>  lib/librte_ethdev/rte_class_eth.c | 38 ++++++++++----
>  lib/librte_ethdev/rte_ethdev.c    | 83 +++++++++++++++++++++++++++++++
>  lib/librte_ethdev/version.map     |  1 +
>  4 files changed, 145 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_ethdev/ethdev_driver.h b/lib/librte_ethdev/ethdev_driver.h
> index abcbc3112d..23342f1be2 100644
> --- a/lib/librte_ethdev/ethdev_driver.h
> +++ b/lib/librte_ethdev/ethdev_driver.h
> @@ -1243,6 +1243,38 @@ struct rte_eth_devargs {
>  	enum rte_eth_representor_type type; /* type of representor */
>  };
>  
> +/**
> + * PMD helper function to convert representor ID from location detail
> + *
> + * Convert representor ID from controller, pf and (sf or vf).
> + * The mapping is retrieved from rte_eth_representor_info_get().
> + *
> + * If PMD doesn't return representor range info, simply ignore controller
> + * and pf to keep backward compatibility.

It does not sound right. If controller and/or pf is specified,
it must not be ignored.

> + *
> + * @param ethdev
> + *  Handle of ethdev port.
> + * @param id

May I suggest to name it 'repr_id' to make it less ambguous.

> + *  Pointer to converted representor ID.

I'd prefer do not mix in and out paramters. I suggest to make
it the last parameter.

> + * @param type
> + *  Representor type.
> + * @param controller
> + *  Controller ID, -1 if unspecified.
> + * @param pf
> + *  PF port ID, -1 if unspecified.
> + * @param representor_port
> + *  Representor port ID, -1 if unspecified.

Not sure that I understand what is it? Is it vf or sf number?

> + *
> + * @return
> + *  Negative errno value on error, 0 on success.
> + */
> +__rte_internal
> +int
> +rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
> +			       uint16_t *id,
> +			       enum rte_eth_representor_type type,
> +			       int controller, int pf, int representor_port);
> +
>  /**
>   * PMD helper function to parse ethdev arguments
>   *
> diff --git a/lib/librte_ethdev/rte_class_eth.c b/lib/librte_ethdev/rte_class_eth.c
> index 051c892b40..f7b7e659e7 100644
> --- a/lib/librte_ethdev/rte_class_eth.c
> +++ b/lib/librte_ethdev/rte_class_eth.c
> @@ -65,9 +65,10 @@ eth_representor_cmp(const char *key __rte_unused,
>  {
>  	int ret;
>  	char *values;
> -	const struct rte_eth_dev_data *data = opaque;
> -	struct rte_eth_devargs representors;
> -	uint16_t index;
> +	const struct rte_eth_dev *edev = opaque;
> +	const struct rte_eth_dev_data *data = edev->data;
> +	struct rte_eth_devargs eth_da;
> +	uint16_t id, nc, np, nf, i, c, p, f;
>  
>  	if ((data->dev_flags & RTE_ETH_DEV_REPRESENTOR) == 0)
>  		return -1; /* not a representor port */
> @@ -76,17 +77,36 @@ eth_representor_cmp(const char *key __rte_unused,
>  	values = strdup(value);
>  	if (values == NULL)
>  		return -1;
> -	memset(&representors, 0, sizeof(representors));
> -	ret = rte_eth_devargs_parse_representor_ports(values, &representors);
> +	memset(&eth_da, 0, sizeof(eth_da));
> +	ret = rte_eth_devargs_parse_representor_ports(values, &eth_da);
>  	free(values);
>  	if (ret != 0)
>  		return -1; /* invalid devargs value */
>  
> +	if (eth_da.nb_mh_controllers == 0 && eth_da.nb_ports == 0 &&
> +	    eth_da.nb_representor_ports == 0)
> +		return -1;
> +	nc = eth_da.nb_mh_controllers > 0 ? eth_da.nb_mh_controllers : 1;
> +	np = eth_da.nb_ports > 0 ? eth_da.nb_ports : 1;
> +	nf = eth_da.nb_representor_ports > 0 ? eth_da.nb_representor_ports : 1;
> +
>  	/* Return 0 if representor id is matching one of the values. */
> -	for (index = 0; index < representors.nb_representor_ports; index++)
> -		if (data->representor_id ==
> -				representors.representor_ports[index])
> +	for (i = 0; i < nc * np * nf; ++i) {
> +		c = i / (np * nf);
> +		p = (i / nf) % np;
> +		f = i % nf;
> +		if (rte_eth_representor_id_convert(edev,
> +			&id,
> +			eth_da.type,
> +			eth_da.nb_mh_controllers == 0 ? -1 :
> +					eth_da.mh_controllers[c],
> +			eth_da.nb_ports == 0 ? -1 : eth_da.ports[p],
> +			eth_da.nb_representor_ports == 0 ? -1 :
> +					eth_da.representor_ports[f]) < 0)
> +			continue;
> +		if (data->representor_id == id)
>  			return 0;
> +	}
>  	return -1; /* no match */
>  }
>  
> @@ -112,7 +132,7 @@ eth_dev_match(const struct rte_eth_dev *edev,
>  
>  	ret = rte_kvargs_process(kvlist,
>  			eth_params_keys[RTE_ETH_PARAM_REPRESENTOR],
> -			eth_representor_cmp, edev->data);
> +			eth_representor_cmp, (void *)(uintptr_t)edev);
>  	if (ret != 0)
>  		return -1;
>  	/* search for representor key */
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index 07c6debb58..da0cf1a920 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -5617,6 +5617,89 @@ rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
>  	return result;
>  }
>  
> +int
> +rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
> +			       uint16_t *id,
> +			       enum rte_eth_representor_type type,
> +			       int controller, int pf, int representor_port)
> +{
> +	int ret, n, i, count;
> +	struct rte_eth_representor_info *info = NULL;
> +	size_t size;
> +
> +	if (type == RTE_ETH_REPRESENTOR_NONE)
> +		return 0;
> +	if (id == NULL)
> +		return -EINVAL;
> +
> +	/* Get PMD representor range info. */
> +	ret = rte_eth_representor_info_get(ethdev->data->port_id, NULL);
> +	if (ret < 0) {
> +		/* Fallback to direct mapping for compatibility. */
> +		*id = representor_port;

I think it is a bad behaviour as I stated above. It is only if
and only if type is VF, controller and PF are unspecified and
representor_port is specified.

> +	}
> +	n = ret;
> +	size = sizeof(*info) + n * sizeof(info->ranges[0]);
> +	info = calloc(1, size);
> +	if (info == NULL)
> +		return -ENOMEM;
> +	ret = rte_eth_representor_info_get(ethdev->data->port_id, info);
> +	if (ret < 0)
> +		goto out;
> +
> +	/* Default controller and pf to caller. */
> +	if (controller == -1)
> +		controller = info->controller;
> +	if (pf == -1)
> +		pf = info->pf;
> +
> +	/* Locate representor ID. */
> +	for (i = 0; i < n; ++i) {
> +		if (info->ranges[i].type != type)
> +			continue;
> +		/* PMD hit: ignore controller if -1. */
> +		if (info->ranges[i].controller != -1 &&
> +		    info->ranges[i].controller != (uint16_t)controller)
> +			continue;

I think it is incorrect to ignore controller in range
if controller is specified in request. It must match.

> +		count = info->ranges[i].id_end - info->ranges[i].id_base + 1;
> +		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_PF) {
> +			/* PF. */
> +			if (pf >= info->ranges[i].pf + count)
> +				continue;
> +			*id = info->ranges[i].id_base +
> +			      (pf - info->ranges[i].pf);
> +			goto out;
> +		}
> +		/* VF or SF. */
> +		/* PMD hit: ignore pf if -1. */
> +		if (info->ranges[i].pf != -1 &&
> +		    info->ranges[i].pf != (uint16_t)pf)
> +			continue;

Same for PF.

> +		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_VF) {

Typically switch/case looks a bit a bitter for such code.

> +			/* VF. */

The comment is useless

> +			if (representor_port >= info->ranges[i].vf + count)
> +				continue;
> +			*id = info->ranges[i].id_base +
> +			      (representor_port - info->ranges[i].vf);
> +			goto out;
> +		} else if (info->ranges[i].type == RTE_ETH_REPRESENTOR_SF) {
> +			/* SF. */

The comment is useless

> +			if (representor_port >= info->ranges[i].sf + count)
> +				continue;
> +			*id = info->ranges[i].id_base +
> +			      (representor_port - info->ranges[i].sf);
> +			goto out;
> +		}
> +	}
> +	/* Not matching representor ID range. */
> +	ret = -ENOENT;
> +
> +out:
> +	if (info != NULL)
> +		free(info);

There is no necessity to check against NULL above, free() does
it in any case.

> +	return ret;
> +}
> +
>  static int
>  eth_dev_handle_port_list(const char *cmd __rte_unused,
>  		const char *params __rte_unused,
> diff --git a/lib/librte_ethdev/version.map b/lib/librte_ethdev/version.map
> index bb6f7436c2..2891f5734e 100644
> --- a/lib/librte_ethdev/version.map
> +++ b/lib/librte_ethdev/version.map
> @@ -268,6 +268,7 @@ INTERNAL {
>  	rte_eth_hairpin_queue_peer_bind;
>  	rte_eth_hairpin_queue_peer_unbind;
>  	rte_eth_hairpin_queue_peer_update;
> +	rte_eth_representor_id_convert;
>  	rte_eth_switch_domain_alloc;
>  	rte_eth_switch_domain_free;
>  };
>
  
Xueming Li Feb. 16, 2021, 4:35 p.m. UTC | #2
>-----Original Message-----
>From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>Sent: Monday, February 15, 2021 5:32 PM
>To: Xueming(Steven) Li <xuemingl@nvidia.com>
>Cc: dev@dpdk.org; Slava Ovsiienko <viacheslavo@nvidia.com>; Asaf Penso <asafp@nvidia.com>; NBU-Contact-Thomas Monjalon
><thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
><nhorman@tuxdriver.com>
>Subject: Re: [PATCH v6 8/9] ethdev: representor iterator compare complete info
>
>On 2/14/21 6:21 AM, Xueming Li wrote:
>> The NIC can have multiple PCIe links and can be attached to multiple
>> hosts, for example the same single NIC can be shared for multiple
>> server units in the rack. On each PCIe link NIC can provide multiple
>> PFs and VFs/SFs based on these ones. The full representor identifier
>> consists of three indices - controller index, PF index, and VF or SF index (if any).
>>
>> SR-IOV and SubFunction are created on top of PF. PF index is
>> introduced because there might be multiple PFs in the bonding
>> configuration and only bonding device is probed.
>>
>> In eth representor comparator callback, ethdev representor ID was
>> compared with devarg. Since controller index and PF index not
>> compared, callback returned representor from other PF or controller.
>>
>> This patch adds new API to convert representor controller, pf and
>> vf/sf index to representor ID. Representor comparer callback convert
>> representor info into ID and compare with device representor ID.
>>
>> Signed-off-by: Xueming Li <xuemingl@nvidia.com>
>> ---
>>  lib/librte_ethdev/ethdev_driver.h | 32 ++++++++++++
>> lib/librte_ethdev/rte_class_eth.c | 38 ++++++++++----
>>  lib/librte_ethdev/rte_ethdev.c    | 83 +++++++++++++++++++++++++++++++
>>  lib/librte_ethdev/version.map     |  1 +
>>  4 files changed, 145 insertions(+), 9 deletions(-)
>>
>> diff --git a/lib/librte_ethdev/ethdev_driver.h
>> b/lib/librte_ethdev/ethdev_driver.h
>> index abcbc3112d..23342f1be2 100644
>> --- a/lib/librte_ethdev/ethdev_driver.h
>> +++ b/lib/librte_ethdev/ethdev_driver.h
>> @@ -1243,6 +1243,38 @@ struct rte_eth_devargs {
>>  	enum rte_eth_representor_type type; /* type of representor */  };
>>
>> +/**
>> + * PMD helper function to convert representor ID from location detail
>> + *
>> + * Convert representor ID from controller, pf and (sf or vf).
>> + * The mapping is retrieved from rte_eth_representor_info_get().
>> + *
>> + * If PMD doesn't return representor range info, simply ignore
>> +controller
>> + * and pf to keep backward compatibility.
>
>It does not sound right. If controller and/or pf is specified, it must not be ignored.

Okay, return error if info not found.

>
>> + *
>> + * @param ethdev
>> + *  Handle of ethdev port.
>> + * @param id
>
>May I suggest to name it 'repr_id' to make it less ambguous.

Accept.

>
>> + *  Pointer to converted representor ID.
>
>I'd prefer do not mix in and out paramters. I suggest to make it the last parameter.
>
>> + * @param type
>> + *  Representor type.
>> + * @param controller
>> + *  Controller ID, -1 if unspecified.
>> + * @param pf
>> + *  PF port ID, -1 if unspecified.
>> + * @param representor_port
>> + *  Representor port ID, -1 if unspecified.
>
>Not sure that I understand what is it? Is it vf or sf number?

Yes, will make it clear.

>
>> + *
>> + * @return
>> + *  Negative errno value on error, 0 on success.
>> + */
>> +__rte_internal
>> +int
>> +rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
>> +			       uint16_t *id,
>> +			       enum rte_eth_representor_type type,
>> +			       int controller, int pf, int representor_port);
>> +
>>  /**
>>   * PMD helper function to parse ethdev arguments
>>   *
>> diff --git a/lib/librte_ethdev/rte_class_eth.c
>> b/lib/librte_ethdev/rte_class_eth.c
>> index 051c892b40..f7b7e659e7 100644
>> --- a/lib/librte_ethdev/rte_class_eth.c
>> +++ b/lib/librte_ethdev/rte_class_eth.c
>> @@ -65,9 +65,10 @@ eth_representor_cmp(const char *key __rte_unused,
>> {
>>  	int ret;
>>  	char *values;
>> -	const struct rte_eth_dev_data *data = opaque;
>> -	struct rte_eth_devargs representors;
>> -	uint16_t index;
>> +	const struct rte_eth_dev *edev = opaque;
>> +	const struct rte_eth_dev_data *data = edev->data;
>> +	struct rte_eth_devargs eth_da;
>> +	uint16_t id, nc, np, nf, i, c, p, f;
>>
>>  	if ((data->dev_flags & RTE_ETH_DEV_REPRESENTOR) == 0)
>>  		return -1; /* not a representor port */ @@ -76,17 +77,36 @@
>> eth_representor_cmp(const char *key __rte_unused,
>>  	values = strdup(value);
>>  	if (values == NULL)
>>  		return -1;
>> -	memset(&representors, 0, sizeof(representors));
>> -	ret = rte_eth_devargs_parse_representor_ports(values, &representors);
>> +	memset(&eth_da, 0, sizeof(eth_da));
>> +	ret = rte_eth_devargs_parse_representor_ports(values, &eth_da);
>>  	free(values);
>>  	if (ret != 0)
>>  		return -1; /* invalid devargs value */
>>
>> +	if (eth_da.nb_mh_controllers == 0 && eth_da.nb_ports == 0 &&
>> +	    eth_da.nb_representor_ports == 0)
>> +		return -1;
>> +	nc = eth_da.nb_mh_controllers > 0 ? eth_da.nb_mh_controllers : 1;
>> +	np = eth_da.nb_ports > 0 ? eth_da.nb_ports : 1;
>> +	nf = eth_da.nb_representor_ports > 0 ? eth_da.nb_representor_ports :
>> +1;
>> +
>>  	/* Return 0 if representor id is matching one of the values. */
>> -	for (index = 0; index < representors.nb_representor_ports; index++)
>> -		if (data->representor_id ==
>> -				representors.representor_ports[index])
>> +	for (i = 0; i < nc * np * nf; ++i) {
>> +		c = i / (np * nf);
>> +		p = (i / nf) % np;
>> +		f = i % nf;
>> +		if (rte_eth_representor_id_convert(edev,
>> +			&id,
>> +			eth_da.type,
>> +			eth_da.nb_mh_controllers == 0 ? -1 :
>> +					eth_da.mh_controllers[c],
>> +			eth_da.nb_ports == 0 ? -1 : eth_da.ports[p],
>> +			eth_da.nb_representor_ports == 0 ? -1 :
>> +					eth_da.representor_ports[f]) < 0)
>> +			continue;
>> +		if (data->representor_id == id)
>>  			return 0;
>> +	}
>>  	return -1; /* no match */
>>  }
>>
>> @@ -112,7 +132,7 @@ eth_dev_match(const struct rte_eth_dev *edev,
>>
>>  	ret = rte_kvargs_process(kvlist,
>>  			eth_params_keys[RTE_ETH_PARAM_REPRESENTOR],
>> -			eth_representor_cmp, edev->data);
>> +			eth_representor_cmp, (void *)(uintptr_t)edev);
>>  	if (ret != 0)
>>  		return -1;
>>  	/* search for representor key */
>> diff --git a/lib/librte_ethdev/rte_ethdev.c
>> b/lib/librte_ethdev/rte_ethdev.c index 07c6debb58..da0cf1a920 100644
>> --- a/lib/librte_ethdev/rte_ethdev.c
>> +++ b/lib/librte_ethdev/rte_ethdev.c
>> @@ -5617,6 +5617,89 @@ rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
>>  	return result;
>>  }
>>
>> +int
>> +rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
>> +			       uint16_t *id,
>> +			       enum rte_eth_representor_type type,
>> +			       int controller, int pf, int representor_port) {
>> +	int ret, n, i, count;
>> +	struct rte_eth_representor_info *info = NULL;
>> +	size_t size;
>> +
>> +	if (type == RTE_ETH_REPRESENTOR_NONE)
>> +		return 0;
>> +	if (id == NULL)
>> +		return -EINVAL;
>> +
>> +	/* Get PMD representor range info. */
>> +	ret = rte_eth_representor_info_get(ethdev->data->port_id, NULL);
>> +	if (ret < 0) {
>> +		/* Fallback to direct mapping for compatibility. */
>> +		*id = representor_port;
>
>I think it is a bad behaviour as I stated above. It is only if and only if type is VF, controller and PF are unspecified and representor_port
>is specified.

Agree, will make it only valid for legacy VF representor.

>
>> +	}
>> +	n = ret;
>> +	size = sizeof(*info) + n * sizeof(info->ranges[0]);
>> +	info = calloc(1, size);
>> +	if (info == NULL)
>> +		return -ENOMEM;
>> +	ret = rte_eth_representor_info_get(ethdev->data->port_id, info);
>> +	if (ret < 0)
>> +		goto out;
>> +
>> +	/* Default controller and pf to caller. */
>> +	if (controller == -1)
>> +		controller = info->controller;
>> +	if (pf == -1)
>> +		pf = info->pf;
>> +
>> +	/* Locate representor ID. */
>> +	for (i = 0; i < n; ++i) {
>> +		if (info->ranges[i].type != type)
>> +			continue;
>> +		/* PMD hit: ignore controller if -1. */
>> +		if (info->ranges[i].controller != -1 &&
>> +		    info->ranges[i].controller != (uint16_t)controller)
>> +			continue;
>
>I think it is incorrect to ignore controller in range if controller is specified in request. It must match.

This is a real requirement I'm facing from orchestration, before orchestration having knowledge on port
status, it always send "pf#vf#" to OVS->DPDK, although "pf#" is meaningless in standard mode. It will take
time for orchestration and OVS to evolve... PMD uses this option to decide ignore them or not.

>
>> +		count = info->ranges[i].id_end - info->ranges[i].id_base + 1;
>> +		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_PF) {
>> +			/* PF. */
>> +			if (pf >= info->ranges[i].pf + count)
>> +				continue;
>> +			*id = info->ranges[i].id_base +
>> +			      (pf - info->ranges[i].pf);
>> +			goto out;
>> +		}
>> +		/* VF or SF. */
>> +		/* PMD hit: ignore pf if -1. */
>> +		if (info->ranges[i].pf != -1 &&
>> +		    info->ranges[i].pf != (uint16_t)pf)
>> +			continue;
>
>Same for PF.
>
>> +		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_VF) {
>
>Typically switch/case looks a bit a bitter for such code.

'bitter'? :) will update.

>
>> +			/* VF. */
>
>The comment is useless

Accept this one and bellows.

>
>> +			if (representor_port >= info->ranges[i].vf + count)
>> +				continue;
>> +			*id = info->ranges[i].id_base +
>> +			      (representor_port - info->ranges[i].vf);
>> +			goto out;
>> +		} else if (info->ranges[i].type == RTE_ETH_REPRESENTOR_SF) {
>> +			/* SF. */
>
>The comment is useless
>
>> +			if (representor_port >= info->ranges[i].sf + count)
>> +				continue;
>> +			*id = info->ranges[i].id_base +
>> +			      (representor_port - info->ranges[i].sf);
>> +			goto out;
>> +		}
>> +	}
>> +	/* Not matching representor ID range. */
>> +	ret = -ENOENT;
>> +
>> +out:
>> +	if (info != NULL)
>> +		free(info);
>
>There is no necessity to check against NULL above, free() does it in any case.
>
>> +	return ret;
>> +}
>> +
>>  static int
>>  eth_dev_handle_port_list(const char *cmd __rte_unused,
>>  		const char *params __rte_unused,
>> diff --git a/lib/librte_ethdev/version.map
>> b/lib/librte_ethdev/version.map index bb6f7436c2..2891f5734e 100644
>> --- a/lib/librte_ethdev/version.map
>> +++ b/lib/librte_ethdev/version.map
>> @@ -268,6 +268,7 @@ INTERNAL {
>>  	rte_eth_hairpin_queue_peer_bind;
>>  	rte_eth_hairpin_queue_peer_unbind;
>>  	rte_eth_hairpin_queue_peer_update;
>> +	rte_eth_representor_id_convert;
>>  	rte_eth_switch_domain_alloc;
>>  	rte_eth_switch_domain_free;
>>  };
>>
  
Andrew Rybchenko Feb. 25, 2021, 7:32 a.m. UTC | #3
On 2/16/21 7:35 PM, Xueming(Steven) Li wrote:
> 
>> -----Original Message-----
>> From: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>> Sent: Monday, February 15, 2021 5:32 PM
>> To: Xueming(Steven) Li <xuemingl@nvidia.com>
>> Cc: dev@dpdk.org; Slava Ovsiienko <viacheslavo@nvidia.com>; Asaf Penso <asafp@nvidia.com>; NBU-Contact-Thomas Monjalon
>> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>; Ray Kinsella <mdr@ashroe.eu>; Neil Horman
>> <nhorman@tuxdriver.com>
>> Subject: Re: [PATCH v6 8/9] ethdev: representor iterator compare complete info
>>
>> On 2/14/21 6:21 AM, Xueming Li wrote:
>>> The NIC can have multiple PCIe links and can be attached to multiple
>>> hosts, for example the same single NIC can be shared for multiple
>>> server units in the rack. On each PCIe link NIC can provide multiple
>>> PFs and VFs/SFs based on these ones. The full representor identifier
>>> consists of three indices - controller index, PF index, and VF or SF index (if any).
>>>
>>> SR-IOV and SubFunction are created on top of PF. PF index is
>>> introduced because there might be multiple PFs in the bonding
>>> configuration and only bonding device is probed.
>>>
>>> In eth representor comparator callback, ethdev representor ID was
>>> compared with devarg. Since controller index and PF index not
>>> compared, callback returned representor from other PF or controller.
>>>
>>> This patch adds new API to convert representor controller, pf and
>>> vf/sf index to representor ID. Representor comparer callback convert
>>> representor info into ID and compare with device representor ID.
>>>
>>> Signed-off-by: Xueming Li <xuemingl@nvidia.com>

[snip]

>>> +	}
>>> +	n = ret;
>>> +	size = sizeof(*info) + n * sizeof(info->ranges[0]);
>>> +	info = calloc(1, size);
>>> +	if (info == NULL)
>>> +		return -ENOMEM;
>>> +	ret = rte_eth_representor_info_get(ethdev->data->port_id, info);
>>> +	if (ret < 0)
>>> +		goto out;
>>> +
>>> +	/* Default controller and pf to caller. */
>>> +	if (controller == -1)
>>> +		controller = info->controller;
>>> +	if (pf == -1)
>>> +		pf = info->pf;
>>> +
>>> +	/* Locate representor ID. */
>>> +	for (i = 0; i < n; ++i) {
>>> +		if (info->ranges[i].type != type)
>>> +			continue;
>>> +		/* PMD hit: ignore controller if -1. */
>>> +		if (info->ranges[i].controller != -1 &&
>>> +		    info->ranges[i].controller != (uint16_t)controller)
>>> +			continue;
>>
>> I think it is incorrect to ignore controller in range if controller is specified in request. It must match.
> 
> This is a real requirement I'm facing from orchestration, before orchestration having knowledge on port
> status, it always send "pf#vf#" to OVS->DPDK, although "pf#" is meaningless in standard mode. It will take
> time for orchestration and OVS to evolve... PMD uses this option to decide ignore them or not.

I'll take a look at it on the next version, but it still sounds
wrong when something is specified, but ignored. May be I simply
misunderstand.

Thanks for working on the patch series.
  

Patch

diff --git a/lib/librte_ethdev/ethdev_driver.h b/lib/librte_ethdev/ethdev_driver.h
index abcbc3112d..23342f1be2 100644
--- a/lib/librte_ethdev/ethdev_driver.h
+++ b/lib/librte_ethdev/ethdev_driver.h
@@ -1243,6 +1243,38 @@  struct rte_eth_devargs {
 	enum rte_eth_representor_type type; /* type of representor */
 };
 
+/**
+ * PMD helper function to convert representor ID from location detail
+ *
+ * Convert representor ID from controller, pf and (sf or vf).
+ * The mapping is retrieved from rte_eth_representor_info_get().
+ *
+ * If PMD doesn't return representor range info, simply ignore controller
+ * and pf to keep backward compatibility.
+ *
+ * @param ethdev
+ *  Handle of ethdev port.
+ * @param id
+ *  Pointer to converted representor ID.
+ * @param type
+ *  Representor type.
+ * @param controller
+ *  Controller ID, -1 if unspecified.
+ * @param pf
+ *  PF port ID, -1 if unspecified.
+ * @param representor_port
+ *  Representor port ID, -1 if unspecified.
+ *
+ * @return
+ *  Negative errno value on error, 0 on success.
+ */
+__rte_internal
+int
+rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
+			       uint16_t *id,
+			       enum rte_eth_representor_type type,
+			       int controller, int pf, int representor_port);
+
 /**
  * PMD helper function to parse ethdev arguments
  *
diff --git a/lib/librte_ethdev/rte_class_eth.c b/lib/librte_ethdev/rte_class_eth.c
index 051c892b40..f7b7e659e7 100644
--- a/lib/librte_ethdev/rte_class_eth.c
+++ b/lib/librte_ethdev/rte_class_eth.c
@@ -65,9 +65,10 @@  eth_representor_cmp(const char *key __rte_unused,
 {
 	int ret;
 	char *values;
-	const struct rte_eth_dev_data *data = opaque;
-	struct rte_eth_devargs representors;
-	uint16_t index;
+	const struct rte_eth_dev *edev = opaque;
+	const struct rte_eth_dev_data *data = edev->data;
+	struct rte_eth_devargs eth_da;
+	uint16_t id, nc, np, nf, i, c, p, f;
 
 	if ((data->dev_flags & RTE_ETH_DEV_REPRESENTOR) == 0)
 		return -1; /* not a representor port */
@@ -76,17 +77,36 @@  eth_representor_cmp(const char *key __rte_unused,
 	values = strdup(value);
 	if (values == NULL)
 		return -1;
-	memset(&representors, 0, sizeof(representors));
-	ret = rte_eth_devargs_parse_representor_ports(values, &representors);
+	memset(&eth_da, 0, sizeof(eth_da));
+	ret = rte_eth_devargs_parse_representor_ports(values, &eth_da);
 	free(values);
 	if (ret != 0)
 		return -1; /* invalid devargs value */
 
+	if (eth_da.nb_mh_controllers == 0 && eth_da.nb_ports == 0 &&
+	    eth_da.nb_representor_ports == 0)
+		return -1;
+	nc = eth_da.nb_mh_controllers > 0 ? eth_da.nb_mh_controllers : 1;
+	np = eth_da.nb_ports > 0 ? eth_da.nb_ports : 1;
+	nf = eth_da.nb_representor_ports > 0 ? eth_da.nb_representor_ports : 1;
+
 	/* Return 0 if representor id is matching one of the values. */
-	for (index = 0; index < representors.nb_representor_ports; index++)
-		if (data->representor_id ==
-				representors.representor_ports[index])
+	for (i = 0; i < nc * np * nf; ++i) {
+		c = i / (np * nf);
+		p = (i / nf) % np;
+		f = i % nf;
+		if (rte_eth_representor_id_convert(edev,
+			&id,
+			eth_da.type,
+			eth_da.nb_mh_controllers == 0 ? -1 :
+					eth_da.mh_controllers[c],
+			eth_da.nb_ports == 0 ? -1 : eth_da.ports[p],
+			eth_da.nb_representor_ports == 0 ? -1 :
+					eth_da.representor_ports[f]) < 0)
+			continue;
+		if (data->representor_id == id)
 			return 0;
+	}
 	return -1; /* no match */
 }
 
@@ -112,7 +132,7 @@  eth_dev_match(const struct rte_eth_dev *edev,
 
 	ret = rte_kvargs_process(kvlist,
 			eth_params_keys[RTE_ETH_PARAM_REPRESENTOR],
-			eth_representor_cmp, edev->data);
+			eth_representor_cmp, (void *)(uintptr_t)edev);
 	if (ret != 0)
 		return -1;
 	/* search for representor key */
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 07c6debb58..da0cf1a920 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -5617,6 +5617,89 @@  rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
 	return result;
 }
 
+int
+rte_eth_representor_id_convert(const struct rte_eth_dev *ethdev,
+			       uint16_t *id,
+			       enum rte_eth_representor_type type,
+			       int controller, int pf, int representor_port)
+{
+	int ret, n, i, count;
+	struct rte_eth_representor_info *info = NULL;
+	size_t size;
+
+	if (type == RTE_ETH_REPRESENTOR_NONE)
+		return 0;
+	if (id == NULL)
+		return -EINVAL;
+
+	/* Get PMD representor range info. */
+	ret = rte_eth_representor_info_get(ethdev->data->port_id, NULL);
+	if (ret < 0) {
+		/* Fallback to direct mapping for compatibility. */
+		*id = representor_port;
+	}
+	n = ret;
+	size = sizeof(*info) + n * sizeof(info->ranges[0]);
+	info = calloc(1, size);
+	if (info == NULL)
+		return -ENOMEM;
+	ret = rte_eth_representor_info_get(ethdev->data->port_id, info);
+	if (ret < 0)
+		goto out;
+
+	/* Default controller and pf to caller. */
+	if (controller == -1)
+		controller = info->controller;
+	if (pf == -1)
+		pf = info->pf;
+
+	/* Locate representor ID. */
+	for (i = 0; i < n; ++i) {
+		if (info->ranges[i].type != type)
+			continue;
+		/* PMD hit: ignore controller if -1. */
+		if (info->ranges[i].controller != -1 &&
+		    info->ranges[i].controller != (uint16_t)controller)
+			continue;
+		count = info->ranges[i].id_end - info->ranges[i].id_base + 1;
+		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_PF) {
+			/* PF. */
+			if (pf >= info->ranges[i].pf + count)
+				continue;
+			*id = info->ranges[i].id_base +
+			      (pf - info->ranges[i].pf);
+			goto out;
+		}
+		/* VF or SF. */
+		/* PMD hit: ignore pf if -1. */
+		if (info->ranges[i].pf != -1 &&
+		    info->ranges[i].pf != (uint16_t)pf)
+			continue;
+		if (info->ranges[i].type == RTE_ETH_REPRESENTOR_VF) {
+			/* VF. */
+			if (representor_port >= info->ranges[i].vf + count)
+				continue;
+			*id = info->ranges[i].id_base +
+			      (representor_port - info->ranges[i].vf);
+			goto out;
+		} else if (info->ranges[i].type == RTE_ETH_REPRESENTOR_SF) {
+			/* SF. */
+			if (representor_port >= info->ranges[i].sf + count)
+				continue;
+			*id = info->ranges[i].id_base +
+			      (representor_port - info->ranges[i].sf);
+			goto out;
+		}
+	}
+	/* Not matching representor ID range. */
+	ret = -ENOENT;
+
+out:
+	if (info != NULL)
+		free(info);
+	return ret;
+}
+
 static int
 eth_dev_handle_port_list(const char *cmd __rte_unused,
 		const char *params __rte_unused,
diff --git a/lib/librte_ethdev/version.map b/lib/librte_ethdev/version.map
index bb6f7436c2..2891f5734e 100644
--- a/lib/librte_ethdev/version.map
+++ b/lib/librte_ethdev/version.map
@@ -268,6 +268,7 @@  INTERNAL {
 	rte_eth_hairpin_queue_peer_bind;
 	rte_eth_hairpin_queue_peer_unbind;
 	rte_eth_hairpin_queue_peer_update;
+	rte_eth_representor_id_convert;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
 };