Message ID | 20220119151016.9970-3-xuan.ding@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Maxime Coquelin |
Headers | show |
Series | vhost: fix async address mapping | expand |
Context | Check | Description |
---|---|---|
ci/Intel-compilation | fail | Compilation issues |
ci/intel-Testing | success | Testing PASS |
ci/iol-abi-testing | success | Testing PASS |
ci/iol-aarch64-compile-testing | success | Testing PASS |
ci/iol-aarch64-unit-testing | success | Testing PASS |
ci/iol-intel-Functional | success | Functional Testing PASS |
ci/iol-intel-Performance | success | Performance Testing PASS |
ci/iol-x86_64-compile-testing | success | Testing PASS |
ci/iol-x86_64-unit-testing | success | Testing PASS |
ci/github-robot: build | success | github build: passed |
ci/iol-broadcom-Performance | success | Performance Testing PASS |
ci/iol-mellanox-Performance | success | Performance Testing PASS |
ci/iol-broadcom-Functional | success | Functional Testing PASS |
ci/checkpatch | success | coding style OK |
On 1/19/22 16:10, xuan.ding@intel.com wrote: > From: Xuan Ding <xuan.ding@intel.com> > > When choosing IOVA as PA mode, IOVA is likely to be discontinuous, > which requires page by page mapping for DMA devices. To be consistent, > this patch implements page by page mapping instead of mapping at the > region granularity for both IOVA as VA and PA mode. > > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost") > > Signed-off-by: Xuan Ding <xuan.ding@intel.com> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com> > --- > lib/vhost/vhost.h | 1 + > lib/vhost/vhost_user.c | 116 ++++++++++++++++++++--------------------- > 2 files changed, 57 insertions(+), 60 deletions(-) > > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h > index ca7f58039d..9521ae56da 100644 > --- a/lib/vhost/vhost.h > +++ b/lib/vhost/vhost.h > @@ -355,6 +355,7 @@ struct vring_packed_desc_event { > struct guest_page { > uint64_t guest_phys_addr; > uint64_t host_iova; > + uint64_t host_user_addr; > uint64_t size; > }; > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index 95c9df697e..48c08716ba 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -143,57 +143,56 @@ get_blk_size(int fd) > return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; > } > > -static int > -async_dma_map(struct rte_vhost_mem_region *region, bool do_map) > +static void > +async_dma_map(struct virtio_net *dev, bool do_map) > { > - uint64_t host_iova; > int ret = 0; > - > - host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); > + uint32_t i; > + struct guest_page *page; Add new line. > if (do_map) { > - /* Add mapped region into the default container of DPDK. */ > - ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > - region->host_user_addr, > - host_iova, > - region->size); > - if (ret) { > - /* > - * DMA device may bind with kernel driver, in this case, > - * we don't need to program IOMMU manually. However, if no > - * device is bound with vfio/uio in DPDK, and vfio kernel > - * module is loaded, the API will still be called and return > - * with ENODEV/ENOSUP. > - * > - * DPDK vfio only returns ENODEV/ENOSUP in very similar > - * situations(vfio either unsupported, or supported > - * but no devices found). Either way, no mappings could be > - * performed. We treat it as normal case in async path. > - */ > - if (rte_errno == ENODEV || rte_errno == ENOTSUP) > - return 0; > - > - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); > - /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */ > - return 0; > + for (i = 0; i < dev->nr_guest_pages; i++) { > + page = &dev->guest_pages[i]; > + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_iova, > + page->size); > + if (ret) { > + /* > + * DMA device may bind with kernel driver, in this case, > + * we don't need to program IOMMU manually. However, if no > + * device is bound with vfio/uio in DPDK, and vfio kernel > + * module is loaded, the API will still be called and return > + * with ENODEV. > + * > + * DPDK vfio only returns ENODEV in very similar situations > + * (vfio either unsupported, or supported but no devices found). > + * Either way, no mappings could be performed. We treat it as > + * normal case in async path. This is a workaround. > + */ > + if (rte_errno == ENODEV) > + return; > + > + /* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */ > + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); > + } > } > > } else { > - /* Remove mapped region from the default container of DPDK. */ > - ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > - region->host_user_addr, > - host_iova, > - region->size); > - if (ret) { > - /* like DMA map, ignore the kernel driver case when unmap. */ > - if (rte_errno == EINVAL) > - return 0; > - > - VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); > - return ret; > + for (i = 0; i < dev->nr_guest_pages; i++) { > + page = &dev->guest_pages[i]; > + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_iova, > + page->size); > + if (ret) { > + /* like DMA map, ignore the kernel driver case when unmap. */ > + if (rte_errno == EINVAL) > + return; > + > + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); > + } > } > } > - > - return ret; > } > > static void > @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev) > if (!dev || !dev->mem) > return; > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > + async_dma_map(dev, false); > + > for (i = 0; i < dev->mem->nregions; i++) { > reg = &dev->mem->regions[i]; > if (reg->host_user_addr) { > - if (dev->async_copy && rte_vfio_is_enabled("vfio")) > - async_dma_map(reg, false); > - > munmap(reg->mmap_addr, reg->mmap_size); > close(reg->fd); > } > @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev, > > static int > add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, > - uint64_t host_iova, uint64_t size) > + uint64_t host_iova, uint64_t host_user_addr, uint64_t size) > { > struct guest_page *page, *last_page; > struct guest_page *old_pages; > @@ -999,8 +998,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, > if (dev->nr_guest_pages > 0) { > last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; > /* merge if the two pages are continuous */ > - if (host_iova == last_page->host_iova + > - last_page->size) { > + if (host_iova == last_page->host_iova + last_page->size > + && guest_phys_addr == last_page->guest_phys_addr + last_page->size > + && host_user_addr == last_page->host_user_addr + last_page->size) { Indentation looks wrong. > last_page->size += size; > return 0; > } > @@ -1009,6 +1009,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, > page = &dev->guest_pages[dev->nr_guest_pages++]; > page->guest_phys_addr = guest_phys_addr; > page->host_iova = host_iova; > + page->host_user_addr = host_user_addr; > page->size = size; > > return 0; > @@ -1028,7 +1029,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, > size = page_size - (guest_phys_addr & (page_size - 1)); > size = RTE_MIN(size, reg_size); > > - if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0) > + if (add_one_guest_page(dev, guest_phys_addr, host_iova, > + host_user_addr, size) < 0) > return -1; > > host_user_addr += size; > @@ -1040,7 +1042,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, > host_iova = rte_mem_virt2iova((void *)(uintptr_t) > host_user_addr); > if (add_one_guest_page(dev, guest_phys_addr, host_iova, > - size) < 0) > + host_user_addr, size) < 0) > return -1; > > host_user_addr += size; > @@ -1215,7 +1217,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > uint64_t mmap_size; > uint64_t alignment; > int populate; > - int ret; > > /* Check for memory_size + mmap_offset overflow */ > if (mmap_offset >= -region->size) { > @@ -1274,14 +1275,6 @@ vhost_user_mmap_region(struct virtio_net *dev, > VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n"); > return -1; > } > - > - if (rte_vfio_is_enabled("vfio")) { > - ret = async_dma_map(region, true); > - if (ret) { > - VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n"); > - return -1; > - } > - } > } > > VHOST_LOG_CONFIG(INFO, > @@ -1420,6 +1413,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, > dev->mem->nregions++; > } > > + if (dev->async_copy && rte_vfio_is_enabled("vfio")) > + async_dma_map(dev, true); > + > if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) > goto free_mem_table; > Overall, the patch looks good. Please fix the small nits & add Fixes tag and cc stable on patch 1. Thanks, Maxime
On 1/19/22 16:10, xuan.ding@intel.com wrote: > From: Xuan Ding <xuan.ding@intel.com> > > When choosing IOVA as PA mode, IOVA is likely to be discontinuous, > which requires page by page mapping for DMA devices. To be consistent, > this patch implements page by page mapping instead of mapping at the > region granularity for both IOVA as VA and PA mode. > > Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost") > > Signed-off-by: Xuan Ding <xuan.ding@intel.com> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com> > --- > lib/vhost/vhost.h | 1 + > lib/vhost/vhost_user.c | 116 ++++++++++++++++++++--------------------- > 2 files changed, 57 insertions(+), 60 deletions(-) > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Thanks, Maxime
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index ca7f58039d..9521ae56da 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -355,6 +355,7 @@ struct vring_packed_desc_event { struct guest_page { uint64_t guest_phys_addr; uint64_t host_iova; + uint64_t host_user_addr; uint64_t size; }; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 95c9df697e..48c08716ba 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -143,57 +143,56 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } -static int -async_dma_map(struct rte_vhost_mem_region *region, bool do_map) +static void +async_dma_map(struct virtio_net *dev, bool do_map) { - uint64_t host_iova; int ret = 0; - - host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); + uint32_t i; + struct guest_page *page; if (do_map) { - /* Add mapped region into the default container of DPDK. */ - ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, - region->host_user_addr, - host_iova, - region->size); - if (ret) { - /* - * DMA device may bind with kernel driver, in this case, - * we don't need to program IOMMU manually. However, if no - * device is bound with vfio/uio in DPDK, and vfio kernel - * module is loaded, the API will still be called and return - * with ENODEV/ENOSUP. - * - * DPDK vfio only returns ENODEV/ENOSUP in very similar - * situations(vfio either unsupported, or supported - * but no devices found). Either way, no mappings could be - * performed. We treat it as normal case in async path. - */ - if (rte_errno == ENODEV || rte_errno == ENOTSUP) - return 0; - - VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); - /* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */ - return 0; + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + /* + * DMA device may bind with kernel driver, in this case, + * we don't need to program IOMMU manually. However, if no + * device is bound with vfio/uio in DPDK, and vfio kernel + * module is loaded, the API will still be called and return + * with ENODEV. + * + * DPDK vfio only returns ENODEV in very similar situations + * (vfio either unsupported, or supported but no devices found). + * Either way, no mappings could be performed. We treat it as + * normal case in async path. This is a workaround. + */ + if (rte_errno == ENODEV) + return; + + /* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */ + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); + } } } else { - /* Remove mapped region from the default container of DPDK. */ - ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, - region->host_user_addr, - host_iova, - region->size); - if (ret) { - /* like DMA map, ignore the kernel driver case when unmap. */ - if (rte_errno == EINVAL) - return 0; - - VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); - return ret; + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + page->host_user_addr, + page->host_iova, + page->size); + if (ret) { + /* like DMA map, ignore the kernel driver case when unmap. */ + if (rte_errno == EINVAL) + return; + + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); + } } } - - return ret; } static void @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev) if (!dev || !dev->mem) return; + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(dev, false); + for (i = 0; i < dev->mem->nregions; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr) { - if (dev->async_copy && rte_vfio_is_enabled("vfio")) - async_dma_map(reg, false); - munmap(reg->mmap_addr, reg->mmap_size); close(reg->fd); } @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev, static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, - uint64_t host_iova, uint64_t size) + uint64_t host_iova, uint64_t host_user_addr, uint64_t size) { struct guest_page *page, *last_page; struct guest_page *old_pages; @@ -999,8 +998,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (dev->nr_guest_pages > 0) { last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; /* merge if the two pages are continuous */ - if (host_iova == last_page->host_iova + - last_page->size) { + if (host_iova == last_page->host_iova + last_page->size + && guest_phys_addr == last_page->guest_phys_addr + last_page->size + && host_user_addr == last_page->host_user_addr + last_page->size) { last_page->size += size; return 0; } @@ -1009,6 +1009,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page = &dev->guest_pages[dev->nr_guest_pages++]; page->guest_phys_addr = guest_phys_addr; page->host_iova = host_iova; + page->host_user_addr = host_user_addr; page->size = size; return 0; @@ -1028,7 +1029,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0) + if (add_one_guest_page(dev, guest_phys_addr, host_iova, + host_user_addr, size) < 0) return -1; host_user_addr += size; @@ -1040,7 +1042,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, host_iova = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); if (add_one_guest_page(dev, guest_phys_addr, host_iova, - size) < 0) + host_user_addr, size) < 0) return -1; host_user_addr += size; @@ -1215,7 +1217,6 @@ vhost_user_mmap_region(struct virtio_net *dev, uint64_t mmap_size; uint64_t alignment; int populate; - int ret; /* Check for memory_size + mmap_offset overflow */ if (mmap_offset >= -region->size) { @@ -1274,14 +1275,6 @@ vhost_user_mmap_region(struct virtio_net *dev, VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n"); return -1; } - - if (rte_vfio_is_enabled("vfio")) { - ret = async_dma_map(region, true); - if (ret) { - VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n"); - return -1; - } - } } VHOST_LOG_CONFIG(INFO, @@ -1420,6 +1413,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, dev->mem->nregions++; } + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(dev, true); + if (vhost_user_postcopy_register(dev, main_fd, msg) < 0) goto free_mem_table;