diff mbox series

[v3,8/9] gpudev: add communication list

Message ID 20211009015349.9694-9-eagostini@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series GPU library | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Elena Agostini Oct. 9, 2021, 1:53 a.m. UTC
From: Elena Agostini <eagostini@nvidia.com>

In heterogeneous computing system, processing is not only in the CPU.
Some tasks can be delegated to devices working in parallel.
When mixing network activity with task processing there may be the need
to put in communication the CPU with the device in order to synchronize
operations.

An example could be a receive-and-process application
where CPU is responsible for receiving packets in multiple mbufs
and the GPU is responsible for processing the content of those packets.

The purpose of this list is to provide a buffer in CPU memory visible
from the GPU that can be treated as a circular buffer
to let the CPU provide fondamental info of received packets to the GPU.

A possible use-case is described below.

CPU:
- Trigger some task on the GPU
- in a loop:
    - receive a number of packets
    - provide packets info to the GPU

GPU:
- Do some pre-processing
- Wait to receive a new set of packet to be processed

Layout of a communication list would be:

     -------
    |   0    | => pkt_list
    | status |
    | #pkts  |
     -------
    |   1    | => pkt_list
    | status |
    | #pkts  |
     -------
    |   2    | => pkt_list
    | status |
    | #pkts  |
     -------
    |  ....  | => pkt_list
     -------

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
 app/test-gpudev/main.c                 | 103 +++++++++++++++
 doc/guides/prog_guide/gpudev.rst       |  16 +++
 doc/guides/rel_notes/release_21_11.rst |   2 +-
 lib/gpudev/gpudev.c                    | 165 +++++++++++++++++++++++++
 lib/gpudev/meson.build                 |   2 +
 lib/gpudev/rte_gpudev.h                | 129 +++++++++++++++++++
 lib/gpudev/version.map                 |   4 +
 7 files changed, 420 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 22f5c950b2..8f7ffa4c63 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -227,6 +227,108 @@  create_update_comm_flag(uint16_t gpu_id)
 	return 0;
 }
 
+static int
+simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
+{
+	int idx;
+
+	if(comm_list_item == NULL)
+		return -1;
+
+	for (idx = 0; idx < num_pkts; idx++) {
+		/**
+		 * consume(comm_list_item->pkt_list[idx].addr);
+		 */
+	}
+	comm_list_item->status = RTE_GPU_COMM_LIST_DONE;
+
+	return 0;
+}
+
+static int
+create_update_comm_list(uint16_t gpu_id)
+{
+	int ret = 0;
+	int i = 0;
+	struct rte_gpu_comm_list * comm_list;
+	uint32_t num_comm_items = 1024;
+	struct rte_mbuf * mbufs[10];
+
+	printf("\n=======> TEST: Communication list\n");
+
+	comm_list = rte_gpu_comm_create_list(gpu_id, num_comm_items);
+	if(comm_list == NULL)
+	{
+		fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret);
+		return -1;
+	}
+
+	/**
+	 * Simulate DPDK receive functions like rte_eth_rx_burst()
+	 */
+	for(i = 0; i < 10; i++)
+	{
+		mbufs[i] = rte_zmalloc(NULL, sizeof(struct rte_mbuf), 0);
+		if (mbufs[i] == NULL) {
+			fprintf(stderr, "Failed to allocate fake mbufs in CPU memory.\n");
+			return -1;
+		}
+
+		memset(mbufs[i], 0, sizeof(struct rte_mbuf));
+	}
+
+	/**
+	 * Populate just the first item of  the list
+	 */
+	ret = rte_gpu_comm_populate_list_pkts(&(comm_list[0]), mbufs, 10);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_populate_list_pkts returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
+	if(ret == 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_cleanup_list erroneusly cleaned the list even if packets have not beeing consumed yet\n");
+		return -1;
+	}
+	else
+	{
+		fprintf(stderr, "rte_gpu_comm_cleanup_list correctly didn't clean up the packets because they have not beeing consumed yet\n");
+	}
+
+	/**
+	 * Simulate a GPU tasks going through the packet list to consume
+	 * mbufs packets and release them
+	 */
+	simulate_gpu_task(&(comm_list[0]), 10);
+
+	/**
+	 * Packets have been consumed, now the communication item
+	 * and the related mbufs can be all released
+	 */
+	ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_destroy_list(comm_list, num_comm_items);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_destroy_list returned error %d\n", ret);
+		return -1;
+	}
+
+	for(i = 0; i < 10; i++)
+		rte_free(mbufs[i]);
+
+	printf("\nCommunication list test passed!\n");
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -282,6 +384,7 @@  main(int argc, char **argv)
 	 * Communication items test
 	 */
 	create_update_comm_flag(gpu_id);
+	create_update_comm_list(gpu_id);
 
 	/* clean up the EAL */
 	rte_eal_cleanup();
diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst
index e0db627aed..cbaec5a1e4 100644
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@@ -86,3 +86,19 @@  that's waiting to receive a signal from the CPU
 to move forward with the execution.
 The communication flag allocates a CPU memory GPU-visible ``uint32_t`` flag
 that can be used by the CPU to communicate with a GPU task.
+
+Communication list
+~~~~~~~~~~~~~~~~~~
+
+By default, DPDK pulls free mbufs from a mempool to receive packets.
+Best practice, expecially in a multithreaded application,
+is to no make any assumption on which mbufs will be used
+to receive the next bursts of packets.
+Considering an application with a GPU memory mempool
+attached to a receive queue having some task waiting on the GPU
+to receive a new burst of packets to be processed,
+there is the need to communicate from the CPU
+the list of mbuf payload addresses where received packet have been stored.
+The ``rte_gpu_comm_*()`` functions are responsible to create a list of packets
+that can be populated with receive mbuf payload addresses
+and communicated to the task running on the GPU.
diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
index 59ab1a1920..0c6d92a269 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -66,7 +66,7 @@  New Features
 
   * Device information
   * Memory management
-  * Communication flag
+  * Communication flag & list
 
 * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
 
diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c
index 827e29d8f6..3cfde97e3c 100644
--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@@ -737,3 +737,168 @@  rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
 
 	return 0;
 }
+
+struct rte_gpu_comm_list *
+rte_gpu_comm_create_list(uint16_t dev_id,
+		uint32_t num_comm_items)
+{
+	struct rte_gpu_comm_list *comm_list;
+	uint32_t idx_l;
+	int ret;
+	struct rte_gpu *dev;
+
+	if (num_comm_items == 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	dev = gpu_get_by_id(dev_id);
+	if (dev == NULL) {
+		GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
+		rte_errno = ENODEV;
+		return NULL;
+	}
+
+	comm_list = rte_zmalloc(NULL, sizeof(struct rte_gpu_comm_list) * num_comm_items, 0);
+	if (comm_list == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	ret = rte_gpu_register(dev_id, sizeof(struct rte_gpu_comm_list) * num_comm_items, comm_list);
+	if(ret < 0)
+	{
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
+		comm_list[idx_l].pkt_list = rte_zmalloc(NULL, sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
+		if (comm_list[idx_l].pkt_list == NULL) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		ret = rte_gpu_register(dev_id, sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, comm_list[idx_l].pkt_list);
+		if(ret < 0)
+		{
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
+		comm_list[idx_l].num_pkts = 0;
+		comm_list[idx_l].dev_id = dev_id;
+	}
+
+	return comm_list;
+}
+
+int
+rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
+		uint32_t num_comm_items)
+{
+	uint32_t idx_l;
+	int ret;
+	uint16_t dev_id;
+
+	if (comm_list == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	dev_id = comm_list[0].dev_id;
+
+	for (idx_l = 0; idx_l < num_comm_items; idx_l++)
+	{
+		ret = rte_gpu_unregister(dev_id, comm_list[idx_l].pkt_list);
+		if(ret < 0)
+		{
+			rte_errno = EINVAL;
+			return -1;
+		}
+
+		rte_free(comm_list[idx_l].pkt_list);
+	}
+
+	ret = rte_gpu_unregister(dev_id, comm_list);
+	if(ret < 0)
+	{
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_free(comm_list);
+
+	return 0;
+}
+
+int
+rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
+		struct rte_mbuf **mbufs, uint32_t num_mbufs)
+{
+	uint32_t idx;
+
+	if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
+			mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	for (idx = 0; idx < num_mbufs; idx++) {
+		/* support only unchained mbufs */
+		if (unlikely((mbufs[idx]->nb_segs > 1) ||
+				(mbufs[idx]->next != NULL) ||
+				(mbufs[idx]->data_len != mbufs[idx]->pkt_len))) {
+			rte_errno = ENOTSUP;
+			return -rte_errno;
+		}
+		comm_list_item->pkt_list[idx].addr =
+				rte_pktmbuf_mtod_offset(mbufs[idx], uintptr_t, 0);
+		comm_list_item->pkt_list[idx].size = mbufs[idx]->pkt_len;
+		comm_list_item->pkt_list[idx].opaque = mbufs[idx];
+	}
+
+	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
+	rte_gpu_mbw(comm_list_item->dev_id);
+	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
+	rte_gpu_mbw(comm_list_item->dev_id);
+
+	return 0;
+}
+
+int
+rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
+{
+	struct rte_mbuf *mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+	uint32_t idx = 0;
+
+	if (comm_list_item == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	if (RTE_GPU_VOLATILE(comm_list_item->status) ==
+			RTE_GPU_COMM_LIST_READY) {
+		GPU_LOG(ERR, "packet list is still in progress");
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	for (idx = 0; idx < RTE_GPU_COMM_LIST_PKTS_MAX; idx++) {
+		if (comm_list_item->pkt_list[idx].addr == 0)
+			break;
+
+		comm_list_item->pkt_list[idx].addr = 0;
+		comm_list_item->pkt_list[idx].size = 0;
+		mbufs[idx] = (struct rte_mbuf *) comm_list_item->pkt_list[idx].opaque;
+	}
+
+	rte_pktmbuf_free_bulk(mbufs, idx);
+
+	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
+	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
+	rte_mb();
+
+	return 0;
+}
diff --git a/lib/gpudev/meson.build b/lib/gpudev/meson.build
index 608154817b..89a118f357 100644
--- a/lib/gpudev/meson.build
+++ b/lib/gpudev/meson.build
@@ -8,3 +8,5 @@  headers = files(
 sources = files(
         'gpudev.c',
 )
+
+deps += ['mbuf']
diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h
index 4a10a8bcf5..a13a4fc2c8 100644
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@@ -9,6 +9,7 @@ 
 #include <stdint.h>
 #include <stdbool.h>
 
+#include <rte_mbuf.h>
 #include <rte_bitops.h>
 #include <rte_compat.h>
 
@@ -41,6 +42,9 @@  extern "C" {
 /** Access variable as volatile. */
 #define RTE_GPU_VOLATILE(x) (*(volatile typeof(x)*)&(x))
 
+/** Max number of packets per communication list. */
+#define RTE_GPU_COMM_LIST_PKTS_MAX 1024
+
 /** Store device info. */
 struct rte_gpu_info {
 	/** Unique identifier name. */
@@ -87,6 +91,43 @@  struct rte_gpu_comm_flag {
 	enum rte_gpu_comm_flag_type mtype;
 };
 
+/** List of packets shared among CPU and device. */
+struct rte_gpu_comm_pkt {
+	/** Address of the packet in memory (e.g. mbuf->buf_addr). */
+	uintptr_t addr;
+	/** Size in byte of the packet. */
+	size_t size;
+	/** Mbuf reference to release it in the rte_gpu_comm_cleanup_list(). */
+	void *opaque;
+};
+
+/** Possible status for the list of packets shared among CPU and device. */
+enum rte_gpu_comm_list_status {
+	/** Packet list can be filled with new mbufs, no one is using it. */
+	RTE_GPU_COMM_LIST_FREE = 0,
+	/** Packet list has been filled with new mbufs and it's ready to be used .*/
+	RTE_GPU_COMM_LIST_READY,
+	/** Packet list has been processed, it's ready to be freed. */
+	RTE_GPU_COMM_LIST_DONE,
+	/** Some error occurred during packet list processing. */
+	RTE_GPU_COMM_LIST_ERROR,
+};
+
+/**
+ * Communication list holding a number of lists of packets
+ * each having a status flag.
+ */
+struct rte_gpu_comm_list {
+	/** Device that will use the communication list. */
+	uint16_t dev_id;
+	/** List of packets populated by the CPU with a set of mbufs info. */
+	struct rte_gpu_comm_pkt *pkt_list;
+	/** Number of packets in the list. */
+	uint32_t num_pkts;
+	/** Status of the list. */
+	enum rte_gpu_comm_list_status status;
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
@@ -513,6 +554,94 @@  __rte_experimental
 int rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag,
 		uint32_t *val);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Create a communication list that can be used to share packets
+ * between CPU and device.
+ * Each element of the list contains:
+ *  - a packet list of RTE_GPU_COMM_LIST_PKTS_MAX elements
+ *  - number of packets in the list
+ *  - a status flag to communicate if the packet list is FREE,
+ *    READY to be processed, DONE with processing.
+ *
+ * The list is allocated in CPU-visible memory.
+ * At creation time, every list is in FREE state.
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param num_comm_items
+ *   Number of items in the communication list.
+ *
+ * @return
+ *   A pointer to the allocated list, otherwise NULL and rte_errno is set:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+struct rte_gpu_comm_list *rte_gpu_comm_create_list(uint16_t dev_id,
+		uint32_t num_comm_items);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Destroy a communication list.
+ *
+ * @param comm_list
+ *   Communication list to be destroyed.
+ * @param num_comm_items
+ *   Number of items in the communication list.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
+		uint32_t num_comm_items);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Populate the packets list of the communication item
+ * with info from a list of mbufs.
+ * Status flag of that packet list is set to READY.
+ *
+ * @param comm_list_item
+ *   Communication list item to fill.
+ * @param mbufs
+ *   List of mbufs.
+ * @param num_mbufs
+ *   Number of mbufs.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ *   - ENOTSUP if mbufs are chained (multiple segments)
+ */
+__rte_experimental
+int rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
+		struct rte_mbuf **mbufs, uint32_t num_mbufs);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Reset a communication list item to the original state.
+ * The status flag set to FREE and mbufs are returned to the pool.
+ *
+ * @param comm_list_item
+ *   Communication list item to reset.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map
index 2fc039373a..45a35fa6e4 100644
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@@ -6,9 +6,13 @@  EXPERIMENTAL {
 	rte_gpu_callback_register;
 	rte_gpu_callback_unregister;
 	rte_gpu_close;
+	rte_gpu_comm_cleanup_list;
 	rte_gpu_comm_create_flag;
+	rte_gpu_comm_create_list;
 	rte_gpu_comm_destroy_flag;
+	rte_gpu_comm_destroy_list;
 	rte_gpu_comm_get_flag_value;
+	rte_gpu_comm_populate_list_pkts;
 	rte_gpu_comm_set_flag;
 	rte_gpu_count_avail;
 	rte_gpu_find_next;