[RFC,v2] eal: add VFIO-PCI SR-IOV support

Message ID 20200410073254.34905-1-haiyue.wang@intel.com (mailing list archive)
State Superseded, archived
Headers
Series [RFC,v2] eal: add VFIO-PCI SR-IOV support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Wang, Haiyue April 10, 2020, 7:32 a.m. UTC
  The kernel module vfio-pci introduces the VF token to enable SR-IOV
support.

The VF token can be set by a vfio-pci based PF driver and must be known
by the vfio-pci based VF driver in order to gain access to the device.

An example VF token option would take this form:

1. ./usertools/dpdk-devbind.py -b vfio-pci 0000:87:00.0

2. echo 2 > /sys/bus/pci/devices/0000:87:00.0/sriov_numvfs

3. Start the PF:
  ./x86_64-native-linux-gcc/app/testpmd -l 22-25 -n 4 \
         -w 87:00.0,vf_token=2ab74924-c335-45f4-9b16-8569e5b08258 \
         --file-prefix=pf -- -i

4. Start the VF:
   ./x86_64-native-linux-gcc/app/testpmd -l 26-29 -n 4 \
         -w 87:02.0,vf_token=2ab74924-c335-45f4-9b16-8569e5b08258 \
         --file-prefix=vf1 -- -i

Test based on vfio-pci patch: https://patchwork.ozlabs.org/cover/1253222/

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
---
Based on RFC v1: https://patchwork.dpdk.org/patch/66281/
---
 drivers/bus/pci/linux/pci_vfio.c  | 56 +++++++++++++++++++++++++++++--
 lib/librte_eal/freebsd/eal.c      |  3 +-
 lib/librte_eal/include/rte_vfio.h |  8 ++++-
 lib/librte_eal/linux/eal_vfio.c   | 20 +++++++++--
 4 files changed, 80 insertions(+), 7 deletions(-)
  

Comments

Vamsi Krishna Attunuru April 10, 2020, 1:02 p.m. UTC | #1
> -----Original Message-----
> From: Haiyue Wang <haiyue.wang@intel.com>
> Sent: Friday, April 10, 2020 1:03 PM
> To: dev@dpdk.org; thomas@monjalon.net; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> alex.williamson@redhat.com; david.marchand@redhat.com
> Cc: Haiyue Wang <haiyue.wang@intel.com>
> Subject: [EXT] [RFC v2] eal: add VFIO-PCI SR-IOV support
> 
> External Email
> 
> ----------------------------------------------------------------------
> The kernel module vfio-pci introduces the VF token to enable SR-IOV
> support.
> 
> The VF token can be set by a vfio-pci based PF driver and must be known by
> the vfio-pci based VF driver in order to gain access to the device.
> 
> An example VF token option would take this form:
> 
> 1. ./usertools/dpdk-devbind.py -b vfio-pci 0000:87:00.0
> 
> 2. echo 2 > /sys/bus/pci/devices/0000:87:00.0/sriov_numvfs
> 
> 3. Start the PF:
>   ./x86_64-native-linux-gcc/app/testpmd -l 22-25 -n 4 \
>          -w 87:00.0,vf_token=2ab74924-c335-45f4-9b16-8569e5b08258 \
>          --file-prefix=pf -- -i
> 
> 4. Start the VF:
>    ./x86_64-native-linux-gcc/app/testpmd -l 26-29 -n 4 \
>          -w 87:02.0,vf_token=2ab74924-c335-45f4-9b16-8569e5b08258 \
>          --file-prefix=vf1 -- -i
> 
> Test based on vfio-pci patch:
> https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__patchwork.ozlabs.org_cover_1253222_&d=DwIDAg&c=nKjWec2b6R0m
> OyPaz7xtfQ&r=WllrYaumVkxaWjgKto6E_rtDQshhIhik2jkvzFyRhW8&m=SFD0o
> TfFgnU88wT2X7qMWRDen3KFV8oSOlqz3WQm3nI&s=CU2jxKUsy4oiI26apob
> DF8BJebDrheBiu3eKlF3e76E&e=
> 
> Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
> ---
> Based on RFC v1: https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__patchwork.dpdk.org_patch_66281_&d=DwIDAg&c=nKjWec2b6R0mOyP
> az7xtfQ&r=WllrYaumVkxaWjgKto6E_rtDQshhIhik2jkvzFyRhW8&m=SFD0oTfFg
> nU88wT2X7qMWRDen3KFV8oSOlqz3WQm3nI&s=3dHnLcXpkRVOZs6wdZQylc
> Je0oNNBs77hXE6SOuLO7o&e=
> ---
>  drivers/bus/pci/linux/pci_vfio.c  | 56 +++++++++++++++++++++++++++++--
>  lib/librte_eal/freebsd/eal.c      |  3 +-
>  lib/librte_eal/include/rte_vfio.h |  8 ++++-
>  lib/librte_eal/linux/eal_vfio.c   | 20 +++++++++--
>  4 files changed, 80 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
> index 64cd84a68..7f99337c7 100644
> --- a/drivers/bus/pci/linux/pci_vfio.c
> +++ b/drivers/bus/pci/linux/pci_vfio.c
> @@ -11,6 +11,7 @@
>  #include <sys/mman.h>
>  #include <stdbool.h>
> 
> +#include <rte_devargs.h>
>  #include <rte_log.h>
>  #include <rte_pci.h>
>  #include <rte_bus_pci.h>
> @@ -644,11 +645,59 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int
> msix_region)
>  	return ret;
>  }
> 
> +static void
> +vfio_pci_vf_token_arg(struct rte_devargs *devargs, rte_uuid_t uu) {
> +#define VF_TOKEN_ARG "vf_token="
> +	char c, *p, *vf_token;
> +
> +	if (devargs == NULL)
> +		return;
> +
> +	p = strstr(devargs->args, VF_TOKEN_ARG);
> +	if (!p)
> +		return;
> +
> +	vf_token = p + strlen(VF_TOKEN_ARG);
> +	if (strlen(vf_token) < (RTE_UUID_STRLEN - 1))
> +		return;
> +
> +	c = vf_token[RTE_UUID_STRLEN - 1];
> +	if (c != '\0' && c != ',')
> +		return;
> +
> +	vf_token[RTE_UUID_STRLEN - 1] = '\0';
> +	if (rte_uuid_parse(vf_token, uu)) {
> +		RTE_LOG(ERR, EAL,
> +			"The VF token is not a valid uuid : %s\n", vf_token);
> +		vf_token[RTE_UUID_STRLEN - 1] = c;
> +		return;
> +	}
> +
> +	RTE_LOG(DEBUG, EAL,
> +		"The VF token is found : %s\n", vf_token);
> +
> +	vf_token[RTE_UUID_STRLEN - 1] = c;
> +
> +	/* Purge this vfio-pci specific token from the device arguments */
> +	if (c != '\0') {
> +		/* 1. Handle the case : 'vf_token=uuid,arg1=val1' */
> +		memmove(p, vf_token + RTE_UUID_STRLEN,
> +			strlen(vf_token + RTE_UUID_STRLEN) + 1);
> +	} else {
> +		/* 2. Handle the case : 'arg1=val1,vf_token=uuid' */
> +		if (p != devargs->args)
> +			p--;
> +
> +		*p = '\0';
> +	}
> +}
> 
>  static int
>  pci_vfio_map_resource_primary(struct rte_pci_device *dev)  {
>  	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
> +	rte_uuid_t vf_token = RTE_UUID_INIT(0, 0, 0, 0, 0ULL);
>  	char pci_addr[PATH_MAX] = {0};
>  	int vfio_dev_fd;
>  	struct rte_pci_addr *loc = &dev->addr; @@ -668,8 +717,9 @@
> pci_vfio_map_resource_primary(struct rte_pci_device *dev)
>  	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
>  			loc->domain, loc->bus, loc->devid, loc->function);
> 
> +	vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
>  	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
> -					&vfio_dev_fd, &device_info);
> +					&vfio_dev_fd, &device_info,
> vf_token);
>  	if (ret)
>  		return ret;
> 
> @@ -797,6 +847,7 @@ static int
>  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)  {
>  	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
> +	rte_uuid_t vf_token = RTE_UUID_INIT(0, 0, 0, 0, 0ULL);
>  	char pci_addr[PATH_MAX] = {0};
>  	int vfio_dev_fd;
>  	struct rte_pci_addr *loc = &dev->addr; @@ -830,8 +881,9 @@
> pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
>  		return -1;
>  	}
> 
> +	vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
>  	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
> -					&vfio_dev_fd, &device_info);
> +					&vfio_dev_fd, &device_info,
> vf_token);
>  	if (ret)
>  		return ret;
> 
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c index
> 6ae37e7e6..cbb4c400e 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -995,7 +995,8 @@ rte_eal_vfio_intr_mode(void)  int
> rte_vfio_setup_device(__rte_unused const char *sysfs_base,
>  		      __rte_unused const char *dev_addr,
>  		      __rte_unused int *vfio_dev_fd,
> -		      __rte_unused struct vfio_device_info *device_info)
> +		      __rte_unused struct vfio_device_info *device_info
> +		      __rte_unused rte_uuid_t vf_token)
>  {
>  	return -1;
>  }
> diff --git a/lib/librte_eal/include/rte_vfio.h b/lib/librte_eal/include/rte_vfio.h
> index 20ed8c45a..1f9e22d82 100644
> --- a/lib/librte_eal/include/rte_vfio.h
> +++ b/lib/librte_eal/include/rte_vfio.h
> @@ -16,6 +16,8 @@ extern "C" {
> 
>  #include <stdint.h>
> 
> +#include <rte_uuid.h>
> +
>  /*
>   * determine if VFIO is present on the system
>   */
> @@ -102,13 +104,17 @@ struct vfio_device_info;
>   * @param device_info
>   *   Device information.
>   *
> + * @param vf_token
> + *   VF token.
> + *
>   * @return
>   *   0 on success.
>   *   <0 on failure.
>   *   >1 if the device cannot be managed this way.
>   */
>  int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
> -		int *vfio_dev_fd, struct vfio_device_info *device_info);
> +		int *vfio_dev_fd, struct vfio_device_info *device_info,
> +		rte_uuid_t vf_token);
> 
>  /**
>   * Release a device mapped to a VFIO-managed I/O MMU group.
> diff --git a/lib/librte_eal/linux/eal_vfio.c b/lib/librte_eal/linux/eal_vfio.c
> index 4502aefed..8c0ad04e5 100644
> --- a/lib/librte_eal/linux/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal_vfio.c
> @@ -702,7 +702,8 @@ rte_vfio_clear_group(int vfio_group_fd)
> 
>  int
>  rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
> -		int *vfio_dev_fd, struct vfio_device_info *device_info)
> +		int *vfio_dev_fd, struct vfio_device_info *device_info,
> +		rte_uuid_t vf_token)
>  {
>  	struct vfio_group_status group_status = {
>  			.argsz = sizeof(group_status)
> @@ -712,6 +713,7 @@ rte_vfio_setup_device(const char *sysfs_base, const
> char *dev_addr,
>  	int vfio_container_fd;
>  	int vfio_group_fd;
>  	int iommu_group_num;
> +	char dev[PATH_MAX];
>  	int i, ret;
> 
>  	/* get group number */
> @@ -895,8 +897,19 @@ rte_vfio_setup_device(const char *sysfs_base,
> const char *dev_addr,
>  				t->type_id, t->name);
>  	}
> 
> +	if (!rte_uuid_is_null(vf_token)) {
> +		char vf_token_str[RTE_UUID_STRLEN];
> +
> +		rte_uuid_unparse(vf_token, vf_token_str,
> sizeof(vf_token_str));
> +		snprintf(dev, sizeof(dev),
> +			 "%s vf_token=%s", dev_addr, vf_token_str);
> +	} else {
> +		snprintf(dev, sizeof(dev),
> +			 "%s", dev_addr);
> +	}
> +
>  	/* get a file descriptor for the device */
> -	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD,
> dev_addr);
> +	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD,
> dev);
>  	if (*vfio_dev_fd < 0) {
>  		/* if we cannot get a device fd, this implies a problem with
>  		 * the VFIO group or the container not having IOMMU
> configured.
> @@ -2081,7 +2094,8 @@ int
>  rte_vfio_setup_device(__rte_unused const char *sysfs_base,
>  		__rte_unused const char *dev_addr,
>  		__rte_unused int *vfio_dev_fd,
> -		__rte_unused struct vfio_device_info *device_info)
> +		__rte_unused struct vfio_device_info *device_info
> +		__rte_unused rte_uuid_t vf_token)
>  {
>  	return -1;
>  }
> --
> 2.26.0

Acked-by: Vamsi Attunuru <vattunuru@marvell.com>
  

Patch

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 64cd84a68..7f99337c7 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -11,6 +11,7 @@ 
 #include <sys/mman.h>
 #include <stdbool.h>
 
+#include <rte_devargs.h>
 #include <rte_log.h>
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
@@ -644,11 +645,59 @@  pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
 	return ret;
 }
 
+static void
+vfio_pci_vf_token_arg(struct rte_devargs *devargs, rte_uuid_t uu)
+{
+#define VF_TOKEN_ARG "vf_token="
+	char c, *p, *vf_token;
+
+	if (devargs == NULL)
+		return;
+
+	p = strstr(devargs->args, VF_TOKEN_ARG);
+	if (!p)
+		return;
+
+	vf_token = p + strlen(VF_TOKEN_ARG);
+	if (strlen(vf_token) < (RTE_UUID_STRLEN - 1))
+		return;
+
+	c = vf_token[RTE_UUID_STRLEN - 1];
+	if (c != '\0' && c != ',')
+		return;
+
+	vf_token[RTE_UUID_STRLEN - 1] = '\0';
+	if (rte_uuid_parse(vf_token, uu)) {
+		RTE_LOG(ERR, EAL,
+			"The VF token is not a valid uuid : %s\n", vf_token);
+		vf_token[RTE_UUID_STRLEN - 1] = c;
+		return;
+	}
+
+	RTE_LOG(DEBUG, EAL,
+		"The VF token is found : %s\n", vf_token);
+
+	vf_token[RTE_UUID_STRLEN - 1] = c;
+
+	/* Purge this vfio-pci specific token from the device arguments */
+	if (c != '\0') {
+		/* 1. Handle the case : 'vf_token=uuid,arg1=val1' */
+		memmove(p, vf_token + RTE_UUID_STRLEN,
+			strlen(vf_token + RTE_UUID_STRLEN) + 1);
+	} else {
+		/* 2. Handle the case : 'arg1=val1,vf_token=uuid' */
+		if (p != devargs->args)
+			p--;
+
+		*p = '\0';
+	}
+}
 
 static int
 pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 {
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	rte_uuid_t vf_token = RTE_UUID_INIT(0, 0, 0, 0, 0ULL);
 	char pci_addr[PATH_MAX] = {0};
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
@@ -668,8 +717,9 @@  pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
 			loc->domain, loc->bus, loc->devid, loc->function);
 
+	vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
-					&vfio_dev_fd, &device_info);
+					&vfio_dev_fd, &device_info, vf_token);
 	if (ret)
 		return ret;
 
@@ -797,6 +847,7 @@  static int
 pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 {
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	rte_uuid_t vf_token = RTE_UUID_INIT(0, 0, 0, 0, 0ULL);
 	char pci_addr[PATH_MAX] = {0};
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
@@ -830,8 +881,9 @@  pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 		return -1;
 	}
 
+	vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
-					&vfio_dev_fd, &device_info);
+					&vfio_dev_fd, &device_info, vf_token);
 	if (ret)
 		return ret;
 
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 6ae37e7e6..cbb4c400e 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -995,7 +995,8 @@  rte_eal_vfio_intr_mode(void)
 int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
 		      __rte_unused const char *dev_addr,
 		      __rte_unused int *vfio_dev_fd,
-		      __rte_unused struct vfio_device_info *device_info)
+		      __rte_unused struct vfio_device_info *device_info
+		      __rte_unused rte_uuid_t vf_token)
 {
 	return -1;
 }
diff --git a/lib/librte_eal/include/rte_vfio.h b/lib/librte_eal/include/rte_vfio.h
index 20ed8c45a..1f9e22d82 100644
--- a/lib/librte_eal/include/rte_vfio.h
+++ b/lib/librte_eal/include/rte_vfio.h
@@ -16,6 +16,8 @@  extern "C" {
 
 #include <stdint.h>
 
+#include <rte_uuid.h>
+
 /*
  * determine if VFIO is present on the system
  */
@@ -102,13 +104,17 @@  struct vfio_device_info;
  * @param device_info
  *   Device information.
  *
+ * @param vf_token
+ *   VF token.
+ *
  * @return
  *   0 on success.
  *   <0 on failure.
  *   >1 if the device cannot be managed this way.
  */
 int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
-		int *vfio_dev_fd, struct vfio_device_info *device_info);
+		int *vfio_dev_fd, struct vfio_device_info *device_info,
+		rte_uuid_t vf_token);
 
 /**
  * Release a device mapped to a VFIO-managed I/O MMU group.
diff --git a/lib/librte_eal/linux/eal_vfio.c b/lib/librte_eal/linux/eal_vfio.c
index 4502aefed..8c0ad04e5 100644
--- a/lib/librte_eal/linux/eal_vfio.c
+++ b/lib/librte_eal/linux/eal_vfio.c
@@ -702,7 +702,8 @@  rte_vfio_clear_group(int vfio_group_fd)
 
 int
 rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
-		int *vfio_dev_fd, struct vfio_device_info *device_info)
+		int *vfio_dev_fd, struct vfio_device_info *device_info,
+		rte_uuid_t vf_token)
 {
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
@@ -712,6 +713,7 @@  rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 	int vfio_container_fd;
 	int vfio_group_fd;
 	int iommu_group_num;
+	char dev[PATH_MAX];
 	int i, ret;
 
 	/* get group number */
@@ -895,8 +897,19 @@  rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				t->type_id, t->name);
 	}
 
+	if (!rte_uuid_is_null(vf_token)) {
+		char vf_token_str[RTE_UUID_STRLEN];
+
+		rte_uuid_unparse(vf_token, vf_token_str, sizeof(vf_token_str));
+		snprintf(dev, sizeof(dev),
+			 "%s vf_token=%s", dev_addr, vf_token_str);
+	} else {
+		snprintf(dev, sizeof(dev),
+			 "%s", dev_addr);
+	}
+
 	/* get a file descriptor for the device */
-	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev);
 	if (*vfio_dev_fd < 0) {
 		/* if we cannot get a device fd, this implies a problem with
 		 * the VFIO group or the container not having IOMMU configured.
@@ -2081,7 +2094,8 @@  int
 rte_vfio_setup_device(__rte_unused const char *sysfs_base,
 		__rte_unused const char *dev_addr,
 		__rte_unused int *vfio_dev_fd,
-		__rte_unused struct vfio_device_info *device_info)
+		__rte_unused struct vfio_device_info *device_info
+		__rte_unused rte_uuid_t vf_token)
 {
 	return -1;
 }