[v2] gpudev: use CPU map functionality in comm list

Message ID 20220222002233.21076-1-eagostini@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] gpudev: use CPU map functionality in comm list |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

Elena Agostini Feb. 22, 2022, 12:22 a.m. UTC
  From: Elena Agostini <eagostini@nvidia.com>

rte_gpu_mem_cpu_map() exposes a GPU memory area to the CPU.
In gpudev communication list this is useful to store the
status flag.

A communication list status flag allocated on GPU memory
and mapped for CPU visibility can be updated by CPU and polled
by a GPU workload.

The polling operation is more frequent than the CPU update operation.
Having the status flag in GPU memory reduces the GPU workload polling
latency.

If CPU mapping feature is not enabled, status flag resides in
CPU memory registered so it's visible from the GPU.

To facilitate the interaction with the status flag, this patch
provides also the set/get functions for it.

Signed-off-by: Elena Agostini <eagostini@nvidia.com>

----

Changelog:
- Address checkpatch issues
---
 app/test-gpudev/main.c           |   8 ++-
 doc/guides/prog_guide/gpudev.rst |   2 +-
 lib/gpudev/gpudev.c              | 114 +++++++++++++++++++++++++++++--
 lib/gpudev/rte_gpudev.h          |  49 ++++++++++++-
 lib/gpudev/version.map           |   2 +
 5 files changed, 165 insertions(+), 10 deletions(-)
  

Comments

Thomas Monjalon Feb. 22, 2022, 6:46 p.m. UTC | #1
22/02/2022 01:22, eagostini@nvidia.com:
> From: Elena Agostini <eagostini@nvidia.com>
> 
> rte_gpu_mem_cpu_map() exposes a GPU memory area to the CPU.
> In gpudev communication list this is useful to store the
> status flag.
> 
> A communication list status flag allocated on GPU memory
> and mapped for CPU visibility can be updated by CPU and polled
> by a GPU workload.
> 
> The polling operation is more frequent than the CPU update operation.
> Having the status flag in GPU memory reduces the GPU workload polling
> latency.
> 
> If CPU mapping feature is not enabled, status flag resides in
> CPU memory registered so it's visible from the GPU.
> 
> To facilitate the interaction with the status flag, this patch
> provides also the set/get functions for it.
> 
> Signed-off-by: Elena Agostini <eagostini@nvidia.com>

[...]
>   * @param ptr
> - *   Pointer to the GPU memory area to be unmapped.
> + *   Pointer to the memory area to be unmapped.

rebase issue: that reverts another commit sent the same day. I will skip

[...]
>  #endif /* RTE_GPUDEV_H */
> +

Will remove lines added at the end of 2 files.

Applied, thanks.
  

Patch

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index d4b8b8971d..f065e6cd81 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -324,7 +324,13 @@  simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
 		 * consume(comm_list_item->pkt_list[idx].addr);
 		 */
 	}
-	comm_list_item->status = RTE_GPU_COMM_LIST_DONE;
+	/*
+	 * A real GPU workload function can't directly call rte_gpu_comm_set_status
+	 * because it's a CPU-only function.
+	 * A real GPU workload should implement the content
+	 * of rte_gpu_comm_set_status() in GPU specific code.
+	 */
+	rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_DONE);
 
 	return 0;
 }
diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst
index 6223207a33..0ccf734dc0 100644
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@@ -216,7 +216,7 @@  about how to use functions in this library in case of a CUDA application.
 
        /* GPU kernel keeps checking this flag to know if it has to quit or wait for more packets. */
        while (*quit_flag_ptr == 0) {
-           if (comm_list[comm_list_index]->status != RTE_GPU_COMM_LIST_READY)
+           if (comm_list[comm_list_index]->status_d[0] != RTE_GPU_COMM_LIST_READY)
                continue;
 
            if (threadIdx.x < comm_list[comm_list_index]->num_pkts)
diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c
index ce92d63257..293330fed5 100644
--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@@ -10,6 +10,7 @@ 
 #include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_log.h>
+#include <rte_eal_paging.h>
 
 #include "rte_gpudev.h"
 #include "gpudev_driver.h"
@@ -847,6 +848,46 @@  rte_gpu_comm_create_list(uint16_t dev_id,
 		return NULL;
 	}
 
+	/*
+	 * Use GPU memory CPU map feature if enabled in the driver
+	 * to allocate the status flags of the list.
+	 * Allocating this flag in GPU memory will reduce
+	 * the latency when GPU workload is polling this flag.
+	 */
+	comm_list[0].status_d = rte_gpu_mem_alloc(dev_id,
+			sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+			rte_mem_page_size());
+	if (ret < 0) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	comm_list[0].status_h = rte_gpu_mem_cpu_map(dev_id,
+			sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+			comm_list[0].status_d);
+	if (comm_list[0].status_h == NULL) {
+		/*
+		 * If CPU mapping is not supported by driver
+		 * use regular CPU registered memory.
+		 */
+		comm_list[0].status_h = rte_zmalloc(NULL,
+				sizeof(enum rte_gpu_comm_list_status) * num_comm_items, 0);
+		if (comm_list[0].status_h == NULL) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		ret = rte_gpu_mem_register(dev_id,
+				sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+				comm_list[0].status_h);
+		if (ret < 0) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		comm_list[0].status_d = comm_list[0].status_h;
+	}
+
 	for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
 		comm_list[idx_l].pkt_list = rte_zmalloc(NULL,
 				sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
@@ -863,7 +904,6 @@  rte_gpu_comm_create_list(uint16_t dev_id,
 			return NULL;
 		}
 
-		RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
 		comm_list[idx_l].num_pkts = 0;
 		comm_list[idx_l].dev_id = dev_id;
 
@@ -873,6 +913,17 @@  rte_gpu_comm_create_list(uint16_t dev_id,
 			rte_errno = ENOMEM;
 			return NULL;
 		}
+
+		if (idx_l > 0) {
+			comm_list[idx_l].status_h = &(comm_list[0].status_h[idx_l]);
+			comm_list[idx_l].status_d = &(comm_list[0].status_d[idx_l]);
+
+			ret = rte_gpu_comm_set_status(&comm_list[idx_l], RTE_GPU_COMM_LIST_FREE);
+			if (ret < 0) {
+				rte_errno = ENOMEM;
+				return NULL;
+			}
+		}
 	}
 
 	return comm_list;
@@ -910,6 +961,14 @@  rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
 		return -1;
 	}
 
+	ret = rte_gpu_mem_cpu_unmap(dev_id, comm_list[0].status_d);
+	if (ret == 0) {
+		rte_gpu_mem_free(dev_id, comm_list[0].status_d);
+	} else {
+		rte_gpu_mem_unregister(dev_id, comm_list[0].status_h);
+		rte_free(comm_list[0].status_h);
+	}
+
 	rte_free(comm_list);
 
 	return 0;
@@ -920,6 +979,7 @@  rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
 		struct rte_mbuf **mbufs, uint32_t num_mbufs)
 {
 	uint32_t idx;
+	int ret;
 
 	if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
 			mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
@@ -943,7 +1003,39 @@  rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
 
 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
 	rte_gpu_wmb(comm_list_item->dev_id);
-	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
+	ret = rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_READY);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+int
+rte_gpu_comm_set_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status status)
+{
+	if (comm_list_item == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	RTE_GPU_VOLATILE(comm_list_item->status_h[0]) = status;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_get_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status *status)
+{
+	if (comm_list_item == NULL || status == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	*status = RTE_GPU_VOLATILE(comm_list_item->status_h[0]);
 
 	return 0;
 }
@@ -952,14 +1044,21 @@  int
 rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
 {
 	uint32_t idx = 0;
+	enum rte_gpu_comm_list_status status;
+	int ret;
 
 	if (comm_list_item == NULL) {
 		rte_errno = EINVAL;
 		return -rte_errno;
 	}
 
-	if (RTE_GPU_VOLATILE(comm_list_item->status) ==
-			RTE_GPU_COMM_LIST_READY) {
+	ret = rte_gpu_comm_get_status(comm_list_item, &status);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	if (status == RTE_GPU_COMM_LIST_READY) {
 		GPU_LOG(ERR, "packet list is still in progress");
 		rte_errno = EINVAL;
 		return -rte_errno;
@@ -974,9 +1073,14 @@  rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
 		comm_list_item->mbufs[idx] = NULL;
 	}
 
-	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
+	ret = rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_FREE);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
 	rte_mb();
 
 	return 0;
 }
+
diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h
index 9802bff2a5..25ec55b4e4 100644
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@@ -124,8 +124,10 @@  struct rte_gpu_comm_list {
 	struct rte_gpu_comm_pkt *pkt_list;
 	/** Number of packets in the list. */
 	uint32_t num_pkts;
-	/** Status of the list. */
-	enum rte_gpu_comm_list_status status;
+	/** Status of the list. CPU pointer. */
+	enum rte_gpu_comm_list_status *status_h;
+	/** Status of the list. GPU pointer. */
+	enum rte_gpu_comm_list_status *status_d;
 };
 
 /**
@@ -489,7 +491,7 @@  void *rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr);
  * @param dev_id
  *   Reference device ID.
  * @param ptr
- *   Pointer to the GPU memory area to be unmapped.
+ *   Pointer to the memory area to be unmapped.
  *   NULL is a no-op accepted value.
  *
  * @return
@@ -679,6 +681,46 @@  __rte_experimental
 int rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
 		struct rte_mbuf **mbufs, uint32_t num_mbufs);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set status flag value of a communication list item.
+ *
+ * @param comm_list_item
+ *   Communication list item to query.
+ * @param status
+ *   Status value to set.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_set_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status status);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get status flag value of a communication list item.
+ *
+ * @param comm_list_item
+ *   Communication list item to query.
+ *   Input parameter.
+ * @param status
+ *   Communication list item status flag value.
+ *   Output parameter.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_get_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status *status);
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
@@ -701,3 +743,4 @@  int rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item);
 #endif
 
 #endif /* RTE_GPUDEV_H */
+
diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map
index 5bc5d154cd..b23e3fd6eb 100644
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@@ -12,8 +12,10 @@  EXPERIMENTAL {
 	rte_gpu_comm_destroy_flag;
 	rte_gpu_comm_destroy_list;
 	rte_gpu_comm_get_flag_value;
+	rte_gpu_comm_get_status;
 	rte_gpu_comm_populate_list_pkts;
 	rte_gpu_comm_set_flag;
+	rte_gpu_comm_set_status;
 	rte_gpu_count_avail;
 	rte_gpu_find_next;
 	rte_gpu_info_get;