@@ -654,6 +654,82 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+static int
+pci_vfio_sparse_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
+ struct vfio_region_sparse_mmap_area *vfio_areas,
+ uint32_t nr_areas, int bar_index, int additional_flags,
+ int numa_node)
+{
+ struct pci_map *map = &vfio_res->maps[bar_index];
+ struct rte_mem_map_area *area;
+ struct vfio_region_sparse_mmap_area *sparse;
+ void *bar_addr;
+ uint32_t i, j;
+
+ map->nr_areas = nr_areas;
+
+ if (map->size == 0) {
+ RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index);
+ return 0;
+ }
+
+ if (!map->nr_areas) {
+ RTE_LOG(DEBUG, EAL, "Skip bar %d with no sparse mmap areas\n",
+ bar_index);
+ map->areas = NULL;
+ return 0;
+ }
+
+ if (map->areas == NULL) {
+ map->areas = rte_zmalloc_socket(NULL,
+ sizeof(*map->areas) * nr_areas,
+ RTE_CACHE_LINE_SIZE, numa_node);
+ if (map->areas == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot alloc memory for sparse map areas\n");
+ return -1;
+ }
+ }
+
+ for (i = 0; i < map->nr_areas; i++) {
+ area = &map->areas[i];
+ sparse = &vfio_areas[i];
+
+ bar_addr = mmap(map->addr, sparse->size, 0, MAP_PRIVATE |
+ MAP_ANONYMOUS | additional_flags, -1, 0);
+ if (bar_addr != MAP_FAILED) {
+ area->addr = pci_map_resource(bar_addr, vfio_dev_fd,
+ map->offset + sparse->offset, sparse->size,
+ RTE_MAP_FORCE_ADDRESS);
+ if (area->addr == NULL) {
+ munmap(bar_addr, sparse->size);
+ RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",
+ bar_index);
+ goto err_map;
+ }
+
+ area->offset = sparse->offset;
+ area->size = sparse->size;
+ } else {
+ RTE_LOG(ERR, EAL, "Failed to create inaccessible mapping for BAR%d\n",
+ bar_index);
+ goto err_map;
+ }
+ }
+
+ return 0;
+
+err_map:
+ for (j = 0; j < i; j++) {
+ pci_unmap_resource(map->areas[j].addr, map->areas[j].size);
+ map->areas[j].offset = 0;
+ map->areas[j].size = 0;
+ }
+ rte_free(map->areas);
+ map->nr_areas = 0;
+ return -1;
+}
+
/*
* region info may contain capability headers, so we need to keep reallocating
* the memory until we match allocated memory size with argsz.
@@ -770,6 +846,31 @@ pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd,
return 0;
}
+static void
+clean_up_pci_resource(struct mapped_pci_resource *vfio_res)
+{
+ struct pci_map *map;
+ uint32_t i, j;
+
+ for (i = 0; i < PCI_MAX_RESOURCE; i++) {
+ map = &vfio_res->maps[i];
+ if (map->nr_areas > 1) {
+ for (j = 0; j < map->nr_areas; j++)
+ pci_unmap_resource(map->areas[j].addr,
+ map->areas[j].size);
+ } else {
+ /*
+ * We do not need to be aware of MSI-X BAR mappings.
+ * Using current maps array is enough.
+ */
+ if (map->addr)
+ pci_unmap_resource(map->addr, map->size);
+ }
+ }
+
+ rte_free(map->areas);
+}
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -866,6 +967,8 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
for (i = 0; i < vfio_res->nb_maps; i++) {
void *bar_addr;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info_cap_sparse_mmap *sparse;
ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
if (ret < 0) {
@@ -911,15 +1014,59 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
- ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
- pci_addr, i, strerror(errno));
- free(reg);
- goto err_vfio_res;
- }
+ hdr = pci_vfio_info_cap(reg, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
+
+ if (dev->is_mdev && hdr != NULL) {
+ sparse = container_of(hdr,
+ struct vfio_region_info_cap_sparse_mmap,
+ header);
+
+ ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res,
+ sparse->areas, sparse->nr_areas, i, 0,
+ dev->device.numa_node);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ free(reg);
+ goto err_vfio_res;
+ }
- dev->mem_resource[i].addr = maps[i].addr;
+ dev->sparse_mem[i].size = reg->size;
+ dev->sparse_mem[i].nr_maps = vfio_res->maps[i].nr_areas;
+ dev->sparse_mem[i].areas = vfio_res->maps[i].areas;
+ } else {
+ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ free(reg);
+ goto err_vfio_res;
+ }
+
+ if (dev->is_mdev) {
+ struct pci_map *mdev_map = &maps[i];
+ mdev_map->nr_areas = 1;
+ mdev_map->areas = rte_zmalloc_socket(NULL,
+ sizeof(*mdev_map->areas),
+ RTE_CACHE_LINE_SIZE,
+ dev->device.numa_node);
+ if (maps[i].areas == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot allocate memory for sparse map areas\n");
+ goto err_vfio_res;
+ }
+ mdev_map->areas[0].addr = maps[i].addr;
+ mdev_map->areas[0].offset = 0;
+ mdev_map->areas[0].size = reg->size;
+ dev->sparse_mem[i].size = reg->size;
+ dev->sparse_mem[i].nr_maps = 1;
+ dev->sparse_mem[i].areas = mdev_map->areas;
+ } else {
+ maps[i].nr_areas = 0;
+ maps[i].areas = NULL;
+ dev->mem_resource[i].addr = maps[i].addr;
+ }
+ }
free(reg);
}
@@ -940,6 +1087,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
return 0;
err_vfio_res:
+ clean_up_pci_resource(vfio_res);
rte_free(vfio_res);
err_vfio_dev_fd:
rte_vfio_release_device(rte_pci_get_sysfs_path(),
@@ -960,7 +1108,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
struct mapped_pci_res_list *vfio_res_list =
RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
- struct pci_map *maps;
+ struct pci_map *maps, *cur;
dev->intr_handle.fd = -1;
#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
@@ -1012,14 +1160,49 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
maps = vfio_res->maps;
for (i = 0; i < vfio_res->nb_maps; i++) {
- ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
- pci_addr, i, strerror(errno));
- goto err_vfio_dev_fd;
+ cur = &maps[i];
+ if (cur->nr_areas > 1) {
+ struct vfio_region_sparse_mmap_area *areas;
+ uint32_t i;
+
+ areas = malloc(sizeof(*areas) * cur->nr_areas);
+ if (areas == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to alloc vfio areas for %s\n",
+ pci_addr);
+ goto err_vfio_dev_fd;
+ }
+
+ for (i = 0; i < cur->nr_areas; i++) {
+ areas[i].offset = cur->areas[i].offset;
+ areas[i].size = cur->areas[i].size;
+ }
+
+ ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res,
+ areas, cur->nr_areas, i, MAP_FIXED,
+ dev->device.numa_node);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "%s sparse mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ free(areas);
+ goto err_vfio_dev_fd;
+ }
+
+ free(areas);
+ } else {
+ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res,
+ i, MAP_FIXED);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "%s mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ goto err_vfio_dev_fd;
+ }
+
+ if (dev->is_mdev)
+ cur->areas[0].addr = cur->addr;
+ else
+ dev->mem_resource[i].addr = cur->addr;
}
- dev->mem_resource[i].addr = maps[i].addr;
}
/* we need save vfio_dev_fd, so it can be used during release */
@@ -1054,8 +1237,6 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
const char *pci_addr)
{
struct mapped_pci_resource *vfio_res = NULL;
- struct pci_map *maps;
- int i;
/* Get vfio_res */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
@@ -1079,19 +1260,7 @@ find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
RTE_LOG(INFO, EAL, "Releasing PCI mapped resource for %s\n",
pci_addr);
- maps = vfio_res->maps;
- for (i = 0; i < vfio_res->nb_maps; i++) {
-
- /*
- * We do not need to be aware of MSI-X table BAR mappings as
- * when mapping. Just using current maps array is enough
- */
- if (maps[i].addr) {
- RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
- pci_addr, maps[i].addr);
- pci_unmap_resource(maps[i].addr, maps[i].size);
- }
- }
+ clean_up_pci_resource(vfio_res);
return vfio_res;
}
@@ -110,6 +110,8 @@ struct pci_map {
uint64_t offset;
uint64_t size;
uint64_t phaddr;
+ uint32_t nr_areas;
+ struct rte_mem_map_area *areas;
};
struct pci_msix_table {
@@ -70,6 +70,18 @@ enum rte_pci_kernel_driver {
RTE_PCI_KDRV_NET_UIO, /* NetUIO for Windows */
};
+struct rte_mem_map_area {
+ void *addr;
+ uint64_t offset;
+ uint64_t size;
+};
+
+struct rte_sparse_mem_map {
+ uint64_t size;
+ uint32_t nr_maps;
+ struct rte_mem_map_area *areas;
+};
+
/**
* A structure describing a PCI device.
*/
@@ -82,8 +94,12 @@ struct rte_pci_device {
};
uint8_t is_mdev; /**< True for mediated PCI device */
struct rte_pci_id id; /**< PCI ID. */
- struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
+ union {
+ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
/**< PCI Memory Resource */
+ struct rte_sparse_mem_map sparse_mem[PCI_MAX_RESOURCE];
+ /**< Sparse Memory Map for Mdev */
+ };
struct rte_intr_handle intr_handle; /**< Interrupt handle */
struct rte_pci_driver *driver; /**< PCI driver used in probing */
uint16_t max_vfs; /**< sriov enable if not zero */