diff mbox series

[v2,1/1] app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory

Message ID 20211118185613.3246-2-eagostini@nvidia.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers show
Series app/test-gpudev: introduce ethdev to rx/tx packets using GPU memory | expand

Checks

Context Check Description
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/github-robot: build success github build: passed
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Elena Agostini Nov. 18, 2021, 6:56 p.m. UTC
From: Elena Agostini <eagostini@nvidia.com>

This patch introduces ethdev in test-gpudev app to provide:
- an example to show how GPU memory can be used to send and receive packets
- an useful tool to measure network metrics when using GPU memory with
io forwarding

With this feature test-gpudev can:
- RX packets in CPU or GPU memory
- Store packets in the gpudev communication list
- TX receive packets from the communication list

It's a simulation of a multi-core application.

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
 app/test-gpudev/main.c | 477 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 458 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 250fba6427..18de023208 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -10,6 +10,8 @@ 
 #include <stdarg.h>
 #include <errno.h>
 #include <getopt.h>
+#include <stdbool.h>
+#include <signal.h>
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -19,22 +21,90 @@ 
 #include <rte_ethdev.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
 
 #include <rte_gpudev.h>
 
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    (1UL << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+#define MAX_QUEUES 16
+#define NUM_COMM_ITEMS 2048
+#define PKT_GAP 4
+
+// #define DEBUG_PRINT 1
+
 enum app_args {
 	ARG_HELP,
-	ARG_MEMPOOL
+	ARG_BURST,
+	ARG_GPU,
+	ARG_MBUFD,
+	ARG_MEMORY,
+	ARG_QUEUES,
+	ARG_TESTAPI,
+};
+
+enum mem_type {
+	MEMORY_CPU,
+	MEMORY_GPU
+};
+
+/* Options configurable from cmd line */
+static uint32_t conf_burst = 64;
+static uint16_t conf_gpu_id = 0;
+static enum mem_type conf_mtype = MEMORY_CPU;
+static uint32_t conf_mbuf_dataroom = 2048;
+static uint32_t conf_queues = 1;
+static bool conf_testapi = false;
+static uint16_t conf_nb_descriptors = 2048;
+
+/* Options statically defined */
+static uint32_t conf_nb_mbuf = 16384;
+static uint16_t conf_port_id = 0;
+
+/* Other variables */
+static volatile bool force_quit;
+static struct rte_mempool *mpool;
+static struct rte_pktmbuf_extmem ext_mem;
+struct rte_gpu_comm_list *comm_list_fwd[MAX_QUEUES];
+struct rte_ether_addr port_eth_addr;
+static struct rte_eth_conf port_conf = {
+	.rxmode = {
+		.mq_mode = ETH_MQ_RX_RSS,
+		.split_hdr_size = 0,
+		.offloads = 0,
+	},
+	.txmode = {
+		.mq_mode = ETH_MQ_TX_NONE,
+		.offloads = 0,
+	},
+	.rx_adv_conf = {
+		.rss_conf = {
+			.rss_key = NULL,
+			.rss_hf = ETH_RSS_IP
+		},
+	},
 };
 
 static void
 usage(const char *prog_name)
 {
-	printf("%s [EAL options] --\n",
+	printf("%s [EAL options] --\n"
+		" --help\n"
+		" --burst N: number of packets per rx burst\n"
+		" --gpu N: GPU ID to use\n"
+		" --memory N: external mempool memory type, 0 CPU, 1 GPU\n"
+		" --mbufd N: mbuf dataroom size\n"
+		" --testapi: test gpudev function\n"
+		" --queues N: number of RX queues\n",
 		prog_name);
 }
 
-static void
+static int
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
@@ -42,7 +112,19 @@  args_parse(int argc, char **argv)
 	int opt_idx;
 
 	static struct option lgopts[] = {
-		{ "help", 0, 0, ARG_HELP},
+		{ "help",  0, 0, ARG_HELP},
+		/* Packets per burst. */
+		{ "burst",  1, 0, ARG_BURST},
+		/* GPU to use. */
+		{ "gpu",  1, 0, ARG_GPU},
+		/* Type of memory for the mempool. */
+		{ "memory",  1, 0, ARG_MEMORY},
+		/* Size of mbufs dataroom */
+		{ "mbufd", 1, 0, ARG_MBUFD},
+		/* Number of RX queues */
+		{ "queues", 1, 0, ARG_QUEUES},
+		/* Test only gpudev functions */
+		{ "testapi", 0, 0, ARG_TESTAPI},
 		/* End of options */
 		{ 0, 0, 0, 0 }
 	};
@@ -51,6 +133,24 @@  args_parse(int argc, char **argv)
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
+		case ARG_BURST:
+			conf_burst = (uint32_t) atoi(optarg);
+			break;
+		case ARG_GPU:
+			conf_gpu_id = (uint16_t) atoi(optarg);
+			break;
+		case ARG_MEMORY:
+			conf_mtype = (atoi(optarg) == 1 ? MEMORY_GPU : MEMORY_CPU);
+			break;
+		case ARG_MBUFD:
+			conf_mbuf_dataroom = (uint32_t) atoi(optarg);
+			break;
+		case ARG_QUEUES:
+			conf_queues = (uint32_t) atoi(optarg);
+			break;
+		case ARG_TESTAPI:
+			conf_testapi = (atoi(optarg) == 1 ? true : false);
+			break;
 		case ARG_HELP:
 			usage(argv[0]);
 			break;
@@ -60,6 +160,19 @@  args_parse(int argc, char **argv)
 			break;
 		}
 	}
+
+	if (conf_queues > MAX_QUEUES) {
+		fprintf(stderr, "Can't support more than %d queues\n", MAX_QUEUES);
+		return -1;
+	}
+
+	if (conf_queues * 2 > rte_lcore_count()) {
+		fprintf(stderr, "Need to use at least %d cores to support %d RX/TX queues (EAL cores %d)\n",
+				conf_queues * 2, conf_queues, rte_lcore_count());
+		return -1;
+	}
+
+	return 0;
 }
 
 static int
@@ -342,13 +455,130 @@  create_update_comm_list(uint16_t gpu_id)
 	return -1;
 }
 
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+				signum);
+		force_quit = true;
+	}
+}
+
+static int
+rx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id;
+	uint32_t nb_rx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+	struct rte_mbuf *rx_mbufs[RTE_GPU_COMM_LIST_PKTS_MAX];
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+
+	if (queue_id > conf_queues) {
+		fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+				conf_queues*2, conf_queues);
+		RTE_GPU_VOLATILE(force_quit) = true;
+		return -1;
+	}
+
+	printf("RX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+		nb_rx = 0;
+		while (nb_rx < RTE_GPU_COMM_LIST_PKTS_MAX &&
+				nb_rx < (conf_burst - PKT_GAP) &&
+				force_quit == false) {
+			nb_rx += rte_eth_rx_burst(conf_port_id, queue_id,
+					&(rx_mbufs[nb_rx]),
+					(conf_burst - nb_rx));
+		}
+
+		ret = rte_gpu_comm_populate_list_pkts(
+				&(comm_list_fwd[queue_id][comm_list_item]), rx_mbufs, nb_rx);
+		if (ret) {
+			fprintf(stderr,	"rte_gpu_comm_populate_list_pkts error %d.\n", ret);
+			return -1;
+		}
+
+#ifdef DEBUG_PRINT
+		printf("RX %d pkts from item %d\n",
+			comm_list_fwd[queue_id][comm_list_item].num_pkts,
+			comm_list_item);
+#endif
+
+		RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) = RTE_GPU_COMM_LIST_DONE;
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
+static int
+tx_core(__rte_unused void *arg)
+{
+	uint32_t queue_id = 0;
+	uint32_t nb_tx = 0;
+	int ret = 0;
+	int comm_list_item = 0;
+
+	queue_id = (rte_lcore_index(rte_lcore_id()) - 1) / 2;
+	if (queue_id > conf_queues) {
+		fprintf(stderr, "Please specify the right list of cores (%d cores) in EAL params to support %d queues.\n",
+				conf_queues*2, conf_queues);
+		RTE_GPU_VOLATILE(force_quit) = true;
+		return -1;
+	}
+	printf("TX core started on queue %d.\n", queue_id);
+
+	while (force_quit == false) {
+
+#ifdef DEBUG_PRINT
+		printf("Waiting on item %d\n", comm_list_item);
+#endif
+		while (RTE_GPU_VOLATILE(comm_list_fwd[queue_id][comm_list_item].status) !=
+				RTE_GPU_COMM_LIST_DONE && force_quit == false);
+
+		nb_tx = 0;
+		while (nb_tx < comm_list_fwd[queue_id][comm_list_item].num_pkts) {
+			nb_tx += rte_eth_tx_burst(conf_port_id, queue_id,
+					&(comm_list_fwd[queue_id][comm_list_item].mbufs[nb_tx]),
+					comm_list_fwd[queue_id][comm_list_item].num_pkts - nb_tx);
+		}
+		rte_wmb();
+
+#ifdef DEBUG_PRINT
+		printf("TX %d/%d pkts from item %d\n",
+				nb_tx, comm_list_fwd[queue_id][comm_list_item].num_pkts,
+				comm_list_item);
+#endif
+		ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[queue_id][comm_list_item]));
+		if (ret) {
+			fprintf(stderr, "rte_gpu_comm_cleanup_list error %d.\n", ret);
+			return -1;
+		}
+
+		rte_mb();
+
+		comm_list_item = (comm_list_item+1) % NUM_COMM_ITEMS;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	int ret, core_id;
 	int nb_gpus = 0;
+	int nb_ports = 0;
 	int16_t gpu_id = 0;
+	uint32_t idx_q = 0;
 	struct rte_gpu_info ginfo;
+	struct rte_eth_dev_info dev_info;
 
 	/* Init EAL. */
 	ret = rte_eal_init(argc, argv);
@@ -356,8 +586,14 @@  main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 	argc -= ret;
 	argv += ret;
-	if (argc > 1)
-		args_parse(argc, argv);
+	if (argc > 1) {
+		ret = args_parse(argc, argv);
+		if (ret) {
+			fprintf(stderr, "Input args error.\n");
+			goto exit;
+		}
+	}
+
 	argc -= ret;
 	argv += ret;
 
@@ -381,25 +617,228 @@  main(int argc, char **argv)
 
 	if (nb_gpus == 0) {
 		fprintf(stderr, "Need at least one GPU on the system to run the example\n");
-		return EXIT_FAILURE;
+		goto exit;
 	}
 
-	gpu_id = 0;
+	if (nb_gpus < conf_gpu_id) {
+		fprintf(stderr, "Not enough GPUs in the system (%d / %d).\n", nb_gpus, conf_gpu_id);
+		goto exit;
+	}
 
-	/**
-	 * Memory tests
-	 */
-	alloc_gpu_memory(gpu_id);
-	register_cpu_memory(gpu_id);
+	if (conf_testapi == true) {
+		/* Memory tests */
+		alloc_gpu_memory(gpu_id);
+		register_cpu_memory(gpu_id);
 
-	/**
-	 * Communication items test
-	 */
-	create_update_comm_flag(gpu_id);
-	create_update_comm_list(gpu_id);
+		/* Communication items test */
+		create_update_comm_flag(gpu_id);
+		create_update_comm_list(gpu_id);
+
+		goto exit;
+	}
+
+	force_quit = false;
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
+	nb_ports = rte_eth_dev_count_avail();
+	if (nb_ports == 0)
+		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+	ret = rte_eth_dev_info_get(conf_port_id, &dev_info);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_info_get failed with %d.\n", ret);
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	ext_mem.elt_size = conf_mbuf_dataroom + RTE_PKTMBUF_HEADROOM;
+	ext_mem.buf_len = RTE_ALIGN_CEIL(conf_nb_mbuf * ext_mem.elt_size, GPU_PAGE_SIZE);
+
+	if (conf_mtype == MEMORY_CPU) {
+		ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate CPU DPDK memory.\n");
+			goto exit;
+		}
+
+		ret = rte_gpu_mem_register(conf_gpu_id, ext_mem.buf_len, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr,
+					"rte_gpu_mem_register CPU memory returned error %d.\n", ret);
+			return -1;
+		}
+	} else {
+		ext_mem.buf_iova = RTE_BAD_IOVA;
+
+		ext_mem.buf_ptr = rte_gpu_mem_alloc(conf_gpu_id, ext_mem.buf_len);
+		if (ext_mem.buf_ptr == NULL) {
+			fprintf(stderr, "Could not allocate GPU device memory.\n");
+			goto exit;
+		}
+
+		ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len,
+				NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
+		if (ret) {
+			fprintf(stderr, "Unable to register addr 0x%p, ret %d.\n", ext_mem.buf_ptr, ret);
+			goto exit;
+		}
+	}
+
+	/* DMA map the external memory. */
+	ret = rte_dev_dma_map(dev_info.device, ext_mem.buf_ptr,
+			ext_mem.buf_iova, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr, "Could not DMA map EXT memory.\n");
+		goto exit;
+	}
+
+	/* Create external memory mempool. */
+	mpool = rte_pktmbuf_pool_create_extbuf("payload_mpool", conf_nb_mbuf,
+			0, 0, ext_mem.elt_size,
+			rte_socket_id(), &ext_mem, 1);
+	if (mpool == NULL) {
+		fprintf(stderr, "Could not create EXT memory mempool.\n");
+		goto exit;
+	}
+
+	/* Queues configuration. */
+	ret = rte_eth_dev_configure(conf_port_id, conf_queues,
+			conf_queues, &port_conf);
+	if (ret < 0) {
+		fprintf(stderr,
+				"Cannot configure device: err=%d, port=%u queues=%u\n",
+				ret, conf_port_id, conf_queues);
+		goto exit;
+	}
+
+	ret = rte_eth_dev_adjust_nb_rx_tx_desc(conf_port_id,
+			&conf_nb_descriptors, &conf_nb_descriptors);
+	if (ret) {
+		fprintf(stderr,
+				"Cannot adjust number of descriptors: err=%d, port=%u\n",
+				ret, conf_port_id);
+		goto exit;
+	}
+
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+
+		ret = rte_eth_rx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q),
+				NULL, mpool);
+
+		if (ret) {
+			fprintf(stderr, "rte_eth_rx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+
+		ret = rte_eth_tx_queue_setup(conf_port_id, idx_q,
+				conf_nb_descriptors, rte_lcore_to_socket_id(idx_q), NULL);
+		if (ret) {
+			fprintf(stderr, "rte_eth_tx_queue_setup: err=%d, port=%u\n",
+					ret, conf_port_id);
+			goto exit;
+		}
+	}
+
+	rte_eth_macaddr_get(conf_port_id, &port_eth_addr);
+
+	ret = rte_eth_dev_start(conf_port_id);
+	if (ret) {
+		fprintf(stderr, "rte_eth_dev_start: err=%d, port=%u\n",
+				ret, conf_port_id);
+			goto exit;
+	}
+
+	printf("Port %d: %02x:%02x:%02x:%02x:%02x:%02x started!\n",
+				conf_port_id,
+				(uint8_t)port_eth_addr.addr_bytes[0],
+				(uint8_t)port_eth_addr.addr_bytes[1],
+				port_eth_addr.addr_bytes[2],
+				port_eth_addr.addr_bytes[3],
+				port_eth_addr.addr_bytes[4],
+				port_eth_addr.addr_bytes[5]);
+
+	rte_eth_promiscuous_enable(conf_port_id);
+
+	/* Create communication lists, one per queue. */
+	for (idx_q = 0; idx_q < MAX_QUEUES; idx_q++) {
+		comm_list_fwd[idx_q] = NULL;
+
+		if (idx_q < conf_queues) {
+			comm_list_fwd[idx_q] = rte_gpu_comm_create_list(conf_gpu_id,
+					NUM_COMM_ITEMS);
+			if (comm_list_fwd[idx_q] == NULL) {
+				fprintf(stderr, "comm_create_list returned error %d\n",
+						ret);
+				goto exit;
+			}
+			ret = rte_gpu_comm_cleanup_list(&(comm_list_fwd[idx_q][0]));
+			if (ret < 0) {
+				fprintf(stderr, "comm_cleanup_list returned error %d\n",
+						ret);
+				goto exit;
+			}
+		}
+	}
+
+	core_id = 0;
+	for (idx_q = 0; idx_q < conf_queues; idx_q++) {
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(tx_core, NULL, core_id);
+
+		core_id = rte_get_next_lcore(core_id, 1, 0);
+		rte_eal_remote_launch(rx_core, NULL, core_id);
+	}
+
+	core_id = 0;
+	RTE_LCORE_FOREACH_WORKER(core_id) {
+		if (rte_eal_wait_lcore(core_id) < 0) {
+			fprintf(stderr, "bad exit for core %d.\n",
+					core_id);
+			break;
+		}
+	}
+
+	force_quit = true;
+
+	ret = rte_dev_dma_unmap(dev_info.device, (void *)ext_mem.buf_ptr,
+			RTE_BAD_IOVA, ext_mem.buf_len);
+	if (ret) {
+		fprintf(stderr,
+				"rte_dev_dma_unmap 0x%p -> %d (rte_errno = %d)\n",
+				(uint8_t *)ext_mem.buf_ptr, ret, rte_errno);
+		goto exit;
+	}
+
+	if (conf_mtype == MEMORY_CPU) {
+		ret = rte_gpu_mem_unregister(conf_gpu_id, ext_mem.buf_ptr);
+		if (ret < 0) {
+			fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
+			goto exit;
+		}
+
+		rte_free(ext_mem.buf_ptr);
+
+	} else {
+
+		ret = rte_extmem_unregister(ext_mem.buf_ptr, ext_mem.buf_len);
+		if (ret) {
+			fprintf(stderr, "rte_extmem_unregister failed with %d.\n", ret);
+			goto exit;
+		}
+
+		rte_gpu_mem_free(conf_gpu_id, (void *)ext_mem.buf_ptr);
+	}
+
+	rte_eth_dev_stop(conf_port_id);
+	rte_eth_dev_close(conf_port_id);
 
+exit:
 	/* clean up the EAL */
 	rte_eal_cleanup();
 
+	printf("Bye...\n");
 	return EXIT_SUCCESS;
 }