@@ -30,7 +30,7 @@
extern struct rte_pci_bus rte_pci_bus;
-static int
+int
pci_get_kernel_driver_by_path(const char *filename, char *dri_name,
size_t len)
{
@@ -70,7 +70,7 @@ rte_pci_map_device(struct rte_pci_device *dev)
switch (dev->kdrv) {
case RTE_PCI_KDRV_VFIO:
#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
+ if (pci_vfio_is_enabled(dev))
ret = pci_vfio_map_resource(dev);
#endif
break;
@@ -99,7 +99,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
switch (dev->kdrv) {
case RTE_PCI_KDRV_VFIO:
#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
+ if (pci_vfio_is_enabled(dev))
pci_vfio_unmap_resource(dev);
#endif
break;
@@ -347,6 +347,15 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
int ret;
TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
+ /*
+ * Insert physical PCI devices before all mediated
+ * PCI devices.
+ */
+ if (dev2->is_mdev) {
+ rte_pci_insert_device(dev2, dev);
+ return 0;
+ }
+
ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr);
if (ret > 0)
continue;
@@ -465,8 +474,14 @@ rte_pci_scan(void)
return 0;
#ifdef VFIO_PRESENT
- if (!pci_vfio_is_enabled())
- RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n");
+ if (!rte_vfio_is_enabled("vfio_pci"))
+ RTE_LOG(DEBUG, EAL, "VFIO PCI module not loaded\n");
+
+ if (!rte_vfio_is_enabled("vfio_mdev"))
+ RTE_LOG(DEBUG, EAL, "VFIO MDEV module not loaded\n");
+
+ if (pci_scan_mdev() != 0)
+ return -1;
#endif
dir = opendir(rte_pci_get_sysfs_path());
@@ -737,7 +752,7 @@ rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
switch (dev->kdrv) {
#ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
- if (pci_vfio_is_enabled())
+ if (pci_vfio_is_enabled(dev))
ret = pci_vfio_ioport_map(dev, bar, p);
break;
#endif
@@ -801,8 +816,7 @@ rte_pci_ioport_unmap(struct rte_pci_ioport *p)
switch (p->dev->kdrv) {
#ifdef VFIO_PRESENT
case RTE_PCI_KDRV_VFIO:
- if (pci_vfio_is_enabled())
- ret = pci_vfio_ioport_unmap(p);
+ ret = -1;
break;
#endif
case RTE_PCI_KDRV_IGB_UIO:
@@ -19,6 +19,9 @@
extern void *pci_map_addr;
void *pci_find_max_end_va(void);
+int pci_get_kernel_driver_by_path(const char *filename, char *dri_name,
+ size_t len);
+
/* parse one line of the "resource" sysfs file (note that the 'line'
* string is modified)
*/
@@ -93,7 +96,17 @@ int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
int pci_vfio_map_resource(struct rte_pci_device *dev);
int pci_vfio_unmap_resource(struct rte_pci_device *dev);
-int pci_vfio_is_enabled(void);
+int pci_vfio_is_enabled(struct rte_pci_device *dev);
+
+int pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd,
+ struct vfio_device_info *device_info);
+
+int pci_vfio_get_pci_id(struct rte_pci_device *dev, int vfio_dev_fd,
+ struct rte_pci_id *pci_id);
+
+const char *pci_mdev_get_sysfs_path(void);
+
+int pci_scan_mdev(void);
#endif
@@ -21,6 +21,7 @@
#include <rte_bus.h>
#include <rte_spinlock.h>
#include <rte_tailq.h>
+#include <rte_uuid.h>
#include "eal_filesystem.h"
@@ -741,7 +742,7 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
return ret;
}
-static int
+int
pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd,
struct vfio_device_info *device_info)
{
@@ -776,6 +777,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
struct vfio_region_info *reg = NULL;
char pci_addr[PATH_MAX] = {0};
+ const char *sysfs_base;
int vfio_dev_fd;
struct rte_pci_addr *loc = &dev->addr;
int i, ret;
@@ -791,11 +793,17 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
#endif
/* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ if (dev->is_mdev) {
+ sysfs_base = pci_mdev_get_sysfs_path();
+ rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr));
+ } else {
+ sysfs_base = rte_pci_get_sysfs_path();
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+ }
- ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
- &vfio_dev_fd, &device_info);
+ ret = rte_vfio_setup_device(sysfs_base, pci_addr, &vfio_dev_fd,
+ &device_info);
if (ret)
return ret;
@@ -806,7 +814,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
"Cannot store VFIO mmap details\n");
goto err_vfio_dev_fd;
}
- memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
+
+ vfio_res->is_mdev = dev->is_mdev;
+ if (dev->is_mdev)
+ memcpy(&vfio_res->uuid, &dev->uuid, sizeof(vfio_res->uuid));
+ else
+ memcpy(&vfio_res->pci_addr, &dev->addr,
+ sizeof(vfio_res->pci_addr));
/* get number of registers (up to BAR5) */
vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
@@ -938,6 +952,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
{
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char pci_addr[PATH_MAX] = {0};
+ const char *sysfs_base;
int vfio_dev_fd;
struct rte_pci_addr *loc = &dev->addr;
int i, ret;
@@ -953,15 +968,29 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
#endif
/* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ if (dev->is_mdev) {
+ sysfs_base = pci_mdev_get_sysfs_path();
+ rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr));
+ } else {
+ sysfs_base = rte_pci_get_sysfs_path();
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+ }
/* if we're in a secondary process, just find our tailq entry */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (rte_pci_addr_cmp(&vfio_res->pci_addr,
- &dev->addr))
+ if (dev->is_mdev != vfio_res->is_mdev)
continue;
- break;
+
+ if (!dev->is_mdev && !rte_pci_addr_cmp(&vfio_res->pci_addr,
+ &dev->addr))
+ break;
+
+ if (dev->is_mdev && !rte_uuid_compare(vfio_res->uuid,
+ dev->uuid))
+ break;
+
+ continue;
}
/* if we haven't found our tailq entry, something's wrong */
if (vfio_res == NULL) {
@@ -970,8 +999,8 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
return -1;
}
- ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
- &vfio_dev_fd, &device_info);
+ ret = rte_vfio_setup_device(sysfs_base, pci_addr, &vfio_dev_fd,
+ &device_info);
if (ret)
return ret;
@@ -1030,9 +1059,18 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
/* Get vfio_res */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr))
+ if (dev->is_mdev != vfio_res->is_mdev)
continue;
- break;
+
+ if (!dev->is_mdev && !rte_pci_addr_cmp(&vfio_res->pci_addr,
+ &dev->addr))
+ break;
+
+ if (dev->is_mdev && !rte_uuid_compare(vfio_res->uuid,
+ dev->uuid))
+ break;
+
+ continue;
}
if (vfio_res == NULL)
@@ -1061,6 +1099,7 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
static int
pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
{
+ const char *sysfs_base;
char pci_addr[PATH_MAX] = {0};
struct rte_pci_addr *loc = &dev->addr;
struct mapped_pci_resource *vfio_res = NULL;
@@ -1068,8 +1107,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
int ret;
/* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ if (dev->is_mdev) {
+ sysfs_base = pci_mdev_get_sysfs_path();
+ rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr));
+ } else {
+ sysfs_base = rte_pci_get_sysfs_path();
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+ }
#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
ret = pci_vfio_disable_notifier(dev);
@@ -1091,8 +1136,8 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
return -1;
}
- ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
- dev->intr_handle.vfio_dev_fd);
+ ret = rte_vfio_release_device(sysfs_base, pci_addr,
+ dev->intr_handle.vfio_dev_fd);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Cannot release VFIO device\n");
return ret;
@@ -1117,6 +1162,7 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
static int
pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev)
{
+ const char *sysfs_base;
char pci_addr[PATH_MAX] = {0};
struct rte_pci_addr *loc = &dev->addr;
struct mapped_pci_resource *vfio_res = NULL;
@@ -1124,11 +1170,17 @@ pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev)
int ret;
/* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ if (dev->is_mdev) {
+ sysfs_base = pci_mdev_get_sysfs_path();
+ rte_uuid_unparse(dev->uuid, pci_addr, sizeof(pci_addr));
+ } else {
+ sysfs_base = rte_pci_get_sysfs_path();
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+ }
- ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
- dev->intr_handle.vfio_dev_fd);
+ ret = rte_vfio_release_device(sysfs_base, pci_addr,
+ dev->intr_handle.vfio_dev_fd);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Cannot release VFIO device\n");
return ret;
@@ -1249,8 +1301,61 @@ pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar,
}
int
-pci_vfio_is_enabled(void)
+pci_vfio_is_enabled(struct rte_pci_device *dev)
{
- return rte_vfio_is_enabled("vfio_pci");
+ return rte_vfio_is_enabled(dev->is_mdev ? "vfio_mdev" : "vfio_pci");
}
+
+int
+pci_vfio_get_pci_id(struct rte_pci_device *dev, int vfio_dev_fd,
+ struct rte_pci_id *pci_id)
+{
+ uint64_t size, offset;
+ int class;
+
+ if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+ &size, &offset) != 0) {
+ RTE_LOG(DEBUG, EAL, "Cannot get offset of CONFIG region.\n");
+ return -1;
+ }
+
+ /* vendor_id */
+ if (pread64(vfio_dev_fd, &pci_id->vendor_id, sizeof(uint16_t),
+ offset + PCI_VENDOR_ID) != sizeof(uint16_t)) {
+ RTE_LOG(DEBUG, EAL, "Cannot read VendorID from PCI config space\n");
+ return -1;
+ }
+
+ /* device_id */
+ if (pread64(vfio_dev_fd, &pci_id->device_id, sizeof(uint16_t),
+ offset + PCI_DEVICE_ID) != sizeof(uint16_t)) {
+ RTE_LOG(DEBUG, EAL, "Cannot read DeviceID from PCI config space\n");
+ return -1;
+ }
+
+ /* subsystem_vendor_id */
+ if (pread64(vfio_dev_fd, &pci_id->subsystem_vendor_id, sizeof(uint16_t),
+ offset + PCI_SUBSYSTEM_VENDOR_ID) != sizeof(uint16_t)) {
+ RTE_LOG(DEBUG, EAL, "Cannot read SubVendorID from PCI config space\n");
+ return -1;
+ }
+
+ /* subsystem_device_id */
+ if (pread64(vfio_dev_fd, &pci_id->subsystem_device_id, sizeof(uint16_t),
+ offset + PCI_SUBSYSTEM_ID) != sizeof(uint16_t)) {
+ RTE_LOG(DEBUG, EAL, "Cannot read SubDeviceID from PCI config space\n");
+ return -1;
+ }
+
+ /* class_id */
+ if (pread64(vfio_dev_fd, &class, sizeof(uint32_t),
+ offset + PCI_CLASS_REVISION) != sizeof(uint32_t)) {
+ RTE_LOG(DEBUG, EAL, "Cannot read ClassID from PCI config space\n");
+ return -1;
+ }
+ pci_id->class_id = class >> 8;
+
+ return 0;
+}
+
#endif
new file mode 100644
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/pci_regs.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+#include <rte_vfio.h>
+#include <rte_uuid.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+
+#include "private.h"
+#include "pci_init.h"
+
+#ifdef VFIO_PRESENT
+
+extern struct rte_pci_bus rte_pci_bus;
+
+#define SYSFS_MDEV_DEVICES "/sys/bus/mdev/devices"
+
+const char *pci_mdev_get_sysfs_path(void)
+{
+ const char *path = NULL;
+
+ path = getenv("SYSFS_MDEV_DEVICES");
+ if (path == NULL)
+ return SYSFS_MDEV_DEVICES;
+
+ return path;
+}
+
+static int
+is_pci_device(const char *dirname)
+{
+ char device_api[PATH_MAX];
+ char filename[PATH_MAX];
+ char *ptr;
+
+ /* get device_api */
+ snprintf(filename, sizeof(filename), "%s/mdev_type/device_api",
+ dirname);
+
+ if (rte_eal_parse_sysfs_str(filename, device_api,
+ sizeof(device_api)) < 0) {
+ return -1;
+ }
+
+ ptr = strchr(device_api, '\n');
+ if (ptr != NULL)
+ *ptr = '\0';
+
+ return strcmp(device_api, "vfio-pci") == 0;
+}
+
+static int
+pci_scan_one_mdev(const char *dirname, const rte_uuid_t addr)
+{
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ char name[RTE_UUID_STRLEN];
+ char filename[PATH_MAX];
+ char path[PATH_MAX];
+ char driver[PATH_MAX];
+ char *ptr;
+ struct rte_pci_device_internal *pdev;
+ struct rte_pci_device *dev;
+ bool need_release = false;
+ const char *sysfs_base;
+ unsigned long tmp;
+ int vfio_dev_fd;
+ int ret;
+
+ sysfs_base = pci_mdev_get_sysfs_path();
+
+ pdev = malloc(sizeof(*pdev));
+ if (pdev == NULL)
+ return -1;
+
+ memset(pdev, 0, sizeof(*pdev));
+
+ dev = &pdev->device;
+ dev->device.bus = &rte_pci_bus.bus;
+ rte_uuid_unparse(addr, name, sizeof(name));
+
+ /* parse driver */
+ snprintf(filename, sizeof(filename), "%s/driver", dirname);
+ ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver));
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to get kernel driver\n", name);
+ goto err;
+ }
+
+ if (ret != 0 || strcmp(driver, "vfio_mdev") != 0) {
+ RTE_LOG(DEBUG, EAL, "%s: unsupported mdev driver\n", name);
+ goto err;
+ }
+
+ dev->kdrv = RTE_PCI_KDRV_VFIO;
+
+ dev->is_mdev = 1;
+ rte_uuid_copy(dev->uuid, addr);
+
+ snprintf(filename, sizeof(filename), "%s/%s", sysfs_base, name);
+
+ /* Get the path of the parent device. */
+ if (realpath(filename, path) == NULL) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to get parent device\n", name);
+ goto err;
+ }
+
+ ptr = strrchr(path, '/');
+ if (ptr == NULL) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to parse parent device\n",
+ name);
+ goto err;
+ }
+ *ptr = '\0';
+
+ /* get numa node, default to 0 if not present */
+ snprintf(filename, sizeof(filename), "%s/numa_node", path);
+
+ if (access(filename, F_OK) != -1) {
+ if (eal_parse_sysfs_value(filename, &tmp) == 0)
+ dev->device.numa_node = tmp;
+ else
+ dev->device.numa_node = -1;
+ } else {
+ dev->device.numa_node = 0;
+ }
+
+ pci_name_set(dev);
+
+ if (rte_vfio_setup_device(sysfs_base, name, &vfio_dev_fd,
+ &device_info) != 0) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to setup device\n", name);
+ goto err;
+ }
+
+ need_release = true;
+
+ if (pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info) != 0) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to get regions\n", name);
+ goto err;
+ }
+
+ if (pci_vfio_get_pci_id(dev, vfio_dev_fd, &dev->id) != 0) {
+ RTE_LOG(DEBUG, EAL, "%s: failed to access the device\n", name);
+ goto err;
+ }
+
+ /* device is valid, add to the list (sorted) */
+ if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
+ rte_pci_add_device(dev);
+ } else {
+ struct rte_pci_device *dev2;
+ int ret;
+
+ TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
+ /*
+ * Insert mediated PCI devices after all physical
+ * PCI devices.
+ */
+ if (!dev2->is_mdev)
+ continue;
+ ret = rte_uuid_compare(dev->uuid, dev2->uuid);
+ if (ret > 0)
+ continue;
+ if (ret < 0)
+ rte_pci_insert_device(dev2, dev);
+ else {/* already registered */
+ if (!rte_dev_is_probed(&dev2->device)) {
+ dev2->kdrv = dev->kdrv;
+ dev2->max_vfs = dev->max_vfs;
+ pci_name_set(dev2);
+ memmove(dev2->mem_resource,
+ dev->mem_resource,
+ sizeof(dev->mem_resource));
+ } else {
+ /**
+ * If device is plugged and driver is
+ * probed already, (This happens when
+ * we call rte_dev_probe which will
+ * scan all device on the bus) we don't
+ * need to do anything here unless...
+ **/
+ if (dev2->kdrv != dev->kdrv ||
+ dev2->max_vfs != dev->max_vfs ||
+ memcmp(&dev2->id, &dev->id,
+ sizeof(dev2->id)))
+ /*
+ * This should not happen.
+ * But it is still possible if
+ * we unbind a device from
+ * vfio or uio before hotplug
+ * remove and rebind it with
+ * a different configure.
+ * So we just print out the
+ * error as an alarm.
+ */
+ RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n",
+ filename);
+ else if (dev2->device.devargs !=
+ dev->device.devargs) {
+ rte_devargs_remove(dev2->device.devargs);
+ pci_name_set(dev2);
+ }
+ }
+ free(pdev);
+ }
+ return 0;
+ }
+
+ rte_pci_add_device(dev);
+ }
+
+ return 0;
+
+err:
+ if (need_release)
+ rte_vfio_release_device(sysfs_base, name, vfio_dev_fd);
+ free(pdev);
+ return 1;
+}
+
+int
+pci_scan_mdev(void)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ rte_uuid_t addr;
+
+ dir = opendir(pci_mdev_get_sysfs_path());
+ if (dir == NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): opendir failed: %s\n",
+ __func__, strerror(errno));
+ return 0;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (rte_uuid_parse(e->d_name, addr) != 0)
+ continue;
+
+ if (rte_mdev_ignore_device(addr))
+ continue;
+
+ snprintf(dirname, sizeof(dirname), "%s/%s",
+ pci_mdev_get_sysfs_path(), e->d_name);
+
+ if (!is_pci_device(dirname))
+ continue;
+
+ if (pci_scan_one_mdev(dirname, addr) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+#endif /* VFIO_PRESENT */
@@ -11,6 +11,7 @@ if is_linux
'linux/pci.c',
'linux/pci_uio.c',
'linux/pci_vfio.c',
+ 'linux/pci_vfio_mdev.c',
)
includes += include_directories('linux')
endif
@@ -24,6 +24,7 @@
#include <rte_common.h>
#include <rte_devargs.h>
#include <rte_vfio.h>
+#include <rte_uuid.h>
#include "private.h"
@@ -57,15 +58,34 @@ pci_devargs_lookup(const struct rte_pci_addr *pci_addr)
return NULL;
}
+static struct rte_devargs *
+mdev_devargs_lookup(const rte_uuid_t mdev_addr)
+{
+ struct rte_devargs *devargs;
+ rte_uuid_t id;
+
+ RTE_EAL_DEVARGS_FOREACH("pci", devargs) {
+ devargs->bus->parse(devargs->name, &id);
+ if (!rte_uuid_compare(mdev_addr, id))
+ return devargs;
+ }
+ return NULL;
+}
+
void
pci_name_set(struct rte_pci_device *dev)
{
struct rte_devargs *devargs;
/* Each device has its internal, canonical name set. */
- rte_pci_device_name(&dev->addr,
- dev->name, sizeof(dev->name));
- devargs = pci_devargs_lookup(&dev->addr);
+ if (dev->is_mdev) {
+ rte_uuid_unparse(dev->uuid, dev->name, sizeof(dev->name));
+ devargs = mdev_devargs_lookup(dev->uuid);
+ } else {
+ rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name));
+ devargs = pci_devargs_lookup(&dev->addr);
+ }
+
dev->device.devargs = devargs;
/* When using a blocklist, only blocked devices will have
@@ -166,21 +186,17 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
{
int ret;
bool already_probed;
- struct rte_pci_addr *loc;
if ((dr == NULL) || (dev == NULL))
return -EINVAL;
- loc = &dev->addr;
-
/* The device is not blocked; Check if driver supports it */
if (!rte_pci_match(dr, dev))
/* Match of device and driver failed */
return 1;
- RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid, loc->function,
- dev->device.numa_node);
+ RTE_LOG(DEBUG, EAL, "PCI device %s on NUMA socket %i\n",
+ dev->name, dev->device.numa_node);
/* no initialization when marked as blocked, return without error */
if (dev->device.devargs != NULL &&
@@ -235,10 +251,9 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
}
}
- RTE_LOG(INFO, EAL, "Probe PCI driver: %s (%x:%x) device: "PCI_PRI_FMT" (socket %i)\n",
+ RTE_LOG(INFO, EAL, "Probe PCI driver: %s (%x:%x) device: %s (socket %i)\n",
dr->driver.name, dev->id.vendor_id, dev->id.device_id,
- loc->domain, loc->bus, loc->devid, loc->function,
- dev->device.numa_node);
+ dev->name, dev->device.numa_node);
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (already_probed)
@@ -266,7 +281,6 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
static int
rte_pci_detach_dev(struct rte_pci_device *dev)
{
- struct rte_pci_addr *loc;
struct rte_pci_driver *dr;
int ret = 0;
@@ -274,11 +288,9 @@ rte_pci_detach_dev(struct rte_pci_device *dev)
return -EINVAL;
dr = dev->driver;
- loc = &dev->addr;
- RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid,
- loc->function, dev->device.numa_node);
+ RTE_LOG(DEBUG, EAL, "PCI device %s on NUMA socket %i\n",
+ dev->name, dev->device.numa_node);
RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
@@ -345,10 +357,9 @@ pci_probe(void)
ret = pci_probe_all_drivers(dev);
if (ret < 0) {
if (ret != -EEXIST) {
- RTE_LOG(ERR, EAL, "Requested device "
- PCI_PRI_FMT " cannot be used\n",
- dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
+ RTE_LOG(ERR, EAL,
+ "Requested device %s cannot be used\n",
+ dev->name);
rte_errno = errno;
failed++;
}
@@ -395,11 +406,20 @@ pci_parse(const char *name, void *addr)
{
struct rte_pci_addr *out = addr;
struct rte_pci_addr pci_addr;
+ rte_uuid_t mdev_addr;
bool parse;
parse = (rte_pci_addr_parse(name, &pci_addr) == 0);
if (parse && addr != NULL)
*out = pci_addr;
+
+ if (parse)
+ return 0;
+
+ parse = (rte_uuid_parse(name, mdev_addr) == 0);
+ if (parse && addr != NULL)
+ memcpy(addr, &mdev_addr, sizeof(mdev_addr));
+
return parse == false;
}
@@ -622,11 +642,9 @@ pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
return -1;
}
-bool
-rte_pci_ignore_device(const struct rte_pci_addr *pci_addr)
+static bool
+devargs_ignore_device(struct rte_devargs *devargs)
{
- struct rte_devargs *devargs = pci_devargs_lookup(pci_addr);
-
switch (rte_pci_bus.bus.conf.scan_mode) {
case RTE_BUS_SCAN_ALLOWLIST:
if (devargs && devargs->policy == RTE_DEV_ALLOWED)
@@ -641,6 +659,22 @@ rte_pci_ignore_device(const struct rte_pci_addr *pci_addr)
return true;
}
+bool
+rte_pci_ignore_device(const struct rte_pci_addr *pci_addr)
+{
+ struct rte_devargs *devargs = pci_devargs_lookup(pci_addr);
+
+ return devargs_ignore_device(devargs);
+}
+
+bool
+rte_mdev_ignore_device(const rte_uuid_t mdev_addr)
+{
+ struct rte_devargs *devargs = mdev_devargs_lookup(mdev_addr);
+
+ return devargs_ignore_device(devargs);
+}
+
enum rte_iova_mode
rte_pci_get_iommu_class(void)
{
@@ -2,12 +2,15 @@
* Copyright 2018 Gaëtan Rivet
*/
+#include <string.h>
+
#include <rte_bus.h>
#include <rte_bus_pci.h>
#include <rte_dev.h>
#include <rte_errno.h>
#include <rte_kvargs.h>
#include <rte_pci.h>
+#include <rte_uuid.h>
#include "private.h"
@@ -35,6 +38,19 @@ pci_addr_kv_cmp(const char *key __rte_unused,
return -abs(rte_pci_addr_cmp(addr1, addr2));
}
+static int
+mdev_addr_kv_cmp(const char *key __rte_unused,
+ const char *value,
+ void *_addr2)
+{
+ rte_uuid_t addr1;
+ unsigned char *addr2 = _addr2;
+
+ if (rte_uuid_parse(value, addr1))
+ return -1;
+ return -abs(rte_uuid_compare(addr1, addr2));
+}
+
static int
pci_dev_match(const struct rte_device *dev,
const void *_kvlist)
@@ -47,11 +63,21 @@ pci_dev_match(const struct rte_device *dev,
return 0;
pdev = RTE_DEV_TO_PCI_CONST(dev);
/* if any field does not match. */
- if (rte_kvargs_process(kvlist, pci_params_keys[RTE_PCI_PARAM_ADDR],
- &pci_addr_kv_cmp,
- (void *)(intptr_t)&pdev->addr))
- return 1;
- return 0;
+ if (!pdev->is_mdev) {
+ if (rte_kvargs_process(kvlist,
+ pci_params_keys[RTE_PCI_PARAM_ADDR], &pci_addr_kv_cmp,
+ (void *)(intptr_t)&pdev->addr))
+ return 1;
+ else
+ return 0;
+ } else {
+ if (rte_kvargs_process(kvlist,
+ pci_params_keys[RTE_PCI_PARAM_ADDR], &mdev_addr_kv_cmp,
+ (void *)(intptr_t)&pdev->uuid))
+ return 1;
+ else
+ return 0;
+ }
}
void *
@@ -64,6 +64,18 @@ pci_name_set(struct rte_pci_device *dev);
*/
bool rte_pci_ignore_device(const struct rte_pci_addr *pci_addr);
+/**
+ * Validate whether a mediated PCI device with given uuid should be
+ * ignored or not.
+ *
+ * @param mdev_addr
+ * MDEV address of device to be validated
+ * @return
+ * true: if device is to be ignored,
+ * false: if device is to be scanned,
+ */
+bool rte_mdev_ignore_device(const rte_uuid_t mdev_addr);
+
/**
* Add a PCI device to the PCI Bus (append to PCI Device list). This function
* also updates the bus references of the PCI Device (and the generic device
@@ -114,6 +126,11 @@ struct pci_msix_table {
struct mapped_pci_resource {
TAILQ_ENTRY(mapped_pci_resource) next;
+ union {
+ struct rte_pci_addr addr;
+ rte_uuid_t uuid;
+ };
+ uint8_t is_mdev;
struct rte_pci_addr pci_addr;
char path[PATH_MAX];
int nb_maps;
@@ -51,6 +51,15 @@ TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver);
struct rte_devargs;
+/*
+ * NOTE: we can't include rte_uuid.h directly due to the conflicts
+ * introduced by stdbool.h
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/* It's RTE_UUID_STRLEN, which is bigger than PCI_PRI_STR_SIZE. */
+#define RTE_PCI_NAME_LEN (36 + 1)
+
enum rte_pci_kernel_driver {
RTE_PCI_KDRV_UNKNOWN = 0, /* may be misc UIO or bifurcated driver */
RTE_PCI_KDRV_IGB_UIO, /* igb_uio for Linux */
@@ -67,7 +76,11 @@ enum rte_pci_kernel_driver {
struct rte_pci_device {
TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
struct rte_device device; /**< Inherit core device */
- struct rte_pci_addr addr; /**< PCI location. */
+ union {
+ struct rte_pci_addr addr; /**< PCI location. */
+ rte_uuid_t uuid; /**< Mdev location. */
+ };
+ uint8_t is_mdev; /**< True for mediated PCI device */
struct rte_pci_id id; /**< PCI ID. */
struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
/**< PCI Memory Resource */
@@ -75,7 +88,7 @@ struct rte_pci_device {
struct rte_pci_driver *driver; /**< PCI driver used in probing */
uint16_t max_vfs; /**< sriov enable if not zero */
enum rte_pci_kernel_driver kdrv; /**< Kernel driver passthrough */
- char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */
+ char name[RTE_PCI_NAME_LEN]; /**< PCI/Mdev location (ASCII) */
struct rte_intr_handle vfio_req_intr_handle;
/**< Handler of VFIO request interrupt */
};
@@ -1089,6 +1089,15 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+#ifdef VFIO_PRESENT
+ if (rte_eal_vfio_setup() < 0) {
+ rte_eal_init_alert("Cannot init VFIO");
+ rte_errno = EAGAIN;
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+ return -1;
+ }
+#endif
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices");
rte_errno = ENODEV;
@@ -1194,14 +1203,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
-#ifdef VFIO_PRESENT
- if (rte_eal_vfio_setup() < 0) {
- rte_eal_init_alert("Cannot init VFIO");
- rte_errno = EAGAIN;
- __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
- return -1;
- }
-#endif
/* in secondary processes, memory init may allocate additional fbarrays
* not present in primary processes, so to avoid any potential issues,
* initialize memzones first.