[v3] vfio: fix expanding DMA area in ppc64le

Message ID 20190713011532.9426-1-tyos@jp.ibm.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series [v3] vfio: fix expanding DMA area in ppc64le |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/Intel-compilation fail Compilation issues

Commit Message

Takeshi Yoshimura July 13, 2019, 1:15 a.m. UTC
  In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 99 +++++++++++++++++++----------
 1 file changed, 67 insertions(+), 32 deletions(-)
  

Comments

David Christensen July 16, 2019, 12:20 a.m. UTC | #1
> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
> 
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
> 
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---
Acked-by: David Christensen <drc@linux.vnet.ibm.com>
  
Thomas Monjalon July 16, 2019, 10:56 a.m. UTC | #2
16/07/2019 02:20, David Christensen:
> > In ppc64le, expanding DMA areas always fail because we cannot remove
> > a DMA window. As a result, we cannot allocate more than one memseg in
> > ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> > the mapped DMA before removing the window. This patch fixes this
> > incorrect behavior.
> > 
> > I also fixed the order of ioctl for unregister and unmap. The ioctl
> > for unregister sometimes report device busy errors due to the
> > existence of mapped area.
> > 
> > Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> > ---
> Acked-by: David Christensen <drc@linux.vnet.ibm.com>

Applied, thanks
  

Patch

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index fadef427f..ed04231b1 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -1354,14 +1354,6 @@  vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1374,28 +1366,56 @@  vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t addr_64;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *param = arg;
 
-	if (msl->external)
+	if (msl->external || ms->addr_64 == param->addr_64)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *param = arg;
+
+	if (msl->external || ms->addr_64 == param->addr_64)
+		return 0;
+
+	return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
+	uint64_t addr_64;
 };
+
 static int
 vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
@@ -1406,6 +1426,10 @@  vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 	if (msl->external)
 		return 0;
 
+	/* do not iterate ms we haven't mapped yet  */
+	if (param->addr_64 && ms->addr_64 == param->addr_64)
+		return 0;
+
 	if (max > param->window_size) {
 		param->hugepage_sz = ms->hugepage_sz;
 		param->window_size = max;
@@ -1503,6 +1527,7 @@  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 
 	/* check if window size needs to be adjusted */
 	memset(&param, 0, sizeof(param));
+	param.addr_64 = vaddr;
 
 	/* we're inside a callback so use thread-unsafe version */
 	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
@@ -1516,7 +1541,7 @@  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
 		uint64_t max = user_mem_maps->maps[i].iova +
 				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		param.window_size = RTE_MAX(param.window_size, max);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
@@ -1525,9 +1550,33 @@  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	create.levels = 1;
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param remap_param = {
+				.vfio_container_fd = vfio_container_fd,
+				.addr_64 = vaddr,
+			};
+
+			/* release all maps before recreating the window */
+			if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&remap_param) < 0) {
+				RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
+				ret = -1;
+				goto out;
+			}
+			/* release all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not release user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
@@ -1537,7 +1586,7 @@  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&remap_param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1555,23 +1604,8 @@  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1599,6 +1633,7 @@  vfio_spapr_dma_map(int vfio_container_fd)
 	struct spapr_walk_param param;
 
 	memset(&param, 0, sizeof(param));
+	param.addr_64 = 0UL;
 
 	/* create DMA window from 0 to max(phys_addr + len) */
 	rte_memseg_walk(vfio_spapr_window_size_walk, &param);