@@ -19,6 +19,7 @@ apps = [
'test-cmdline',
'test-compress-perf',
'test-crypto-perf',
+ 'test-dma-perf',
'test-eventdev',
'test-fib',
'test-flow-perf',
new file mode 100644
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_time.h>
+#include <rte_mbuf.h>
+#include <rte_dmadev.h>
+#include <rte_malloc.h>
+#include <rte_lcore.h>
+
+#include "main.h"
+
+#define MAX_DMA_CPL_NB 255
+
+#define TEST_WAIT_U_SECOND 10000
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%s,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n"
+
+struct worker_info {
+ bool ready_flag;
+ bool start_flag;
+ bool stop_flag;
+ uint32_t total_cpl;
+ uint32_t test_cpl;
+};
+
+struct lcore_params {
+ uint8_t scenario_id;
+ unsigned int lcore_id;
+ char *dma_name;
+ uint16_t worker_id;
+ uint16_t dev_id;
+ uint32_t nr_buf;
+ uint16_t kick_batch;
+ uint32_t buf_size;
+ uint16_t test_secs;
+ struct rte_mbuf **srcs;
+ struct rte_mbuf **dsts;
+ struct worker_info worker_info;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+static volatile struct lcore_params *worker_params[MAX_WORKER_NB];
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+
+ ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+ va_start(ap, format);
+ ret += vfprintf(stderr, format, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+static inline void
+calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, uint16_t test_secs,
+ uint32_t total_cnt, float *memory, uint32_t *ave_cycle,
+ float *bandwidth, float *mops)
+{
+ *memory = (float)(buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024);
+ *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;
+ *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) / 1000000000;
+ *mops = (float)rte_get_timer_hz() / *ave_cycle / 1000000;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, uint64_t ave_cycle,
+ uint32_t buf_size, uint32_t nr_buf, float memory,
+ float bandwidth, float mops, bool is_dma)
+{
+ if (is_dma)
+ printf("lcore %u, DMA %s:\n", lcore_id, dma_name);
+ else
+ printf("lcore %u\n", lcore_id);
+
+ printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, memory: %.2lfMB, frequency: %" PRIu64 ".\n",
+ ave_cycle, buf_size, nr_buf, memory, rte_get_timer_hz());
+ printf("Average bandwidth: %.3lfGbps, MOps: %.3lf\n", bandwidth, mops);
+
+ if (is_dma)
+ snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, CSV_LINE_DMA_FMT,
+ scenario_id, lcore_id, dma_name, buf_size,
+ nr_buf, memory, ave_cycle, bandwidth, mops);
+ else
+ snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, CSV_LINE_CPU_FMT,
+ scenario_id, lcore_id, buf_size,
+ nr_buf, memory, ave_cycle, bandwidth, mops);
+}
+
+static inline void
+cache_flush_buf(__maybe_unused struct rte_mbuf **array,
+ __maybe_unused uint32_t buf_size,
+ __maybe_unused uint32_t nr_buf)
+{
+#ifdef RTE_ARCH_X86_64
+ char *data;
+ struct rte_mbuf **srcs = array;
+ uint32_t i, offset;
+
+ for (i = 0; i < nr_buf; i++) {
+ data = rte_pktmbuf_mtod(srcs[i], char *);
+ for (offset = 0; offset < buf_size; offset += 64)
+ __builtin_ia32_clflush(data + offset);
+ }
+#endif
+}
+
+/* Configuration of device. */
+static void
+configure_dmadev_queue(uint32_t dev_id, uint32_t ring_size)
+{
+ uint16_t vchan = 0;
+ struct rte_dma_info info;
+ struct rte_dma_conf dev_config = { .nb_vchans = 1 };
+ struct rte_dma_vchan_conf qconf = {
+ .direction = RTE_DMA_DIR_MEM_TO_MEM,
+ .nb_desc = ring_size
+ };
+
+ if (rte_dma_configure(dev_id, &dev_config) != 0)
+ rte_exit(EXIT_FAILURE, "Error with dma configure.\n");
+
+ if (rte_dma_vchan_setup(dev_id, vchan, &qconf) != 0)
+ rte_exit(EXIT_FAILURE, "Error with queue configuration.\n");
+
+ rte_dma_info_get(dev_id, &info);
+ if (info.nb_vchans != 1)
+ rte_exit(EXIT_FAILURE, "Error, no configured queues reported on device id. %u\n", dev_id);
+
+ if (rte_dma_start(dev_id) != 0)
+ rte_exit(EXIT_FAILURE, "Error with dma start.\n");
+}
+
+static int
+config_dmadevs(struct test_configure *cfg)
+{
+ uint32_t ring_size = cfg->ring_size.cur;
+ struct lcore_dma_map_t *ldm = &cfg->lcore_dma_map;
+ uint32_t nb_workers = ldm->cnt;
+ uint32_t i;
+ int dev_id;
+ uint16_t nb_dmadevs = 0;
+ char *dma_name;
+
+ for (i = 0; i < ldm->cnt; i++) {
+ dma_name = ldm->dma_names[i];
+ dev_id = rte_dma_get_dev_id_by_name(dma_name);
+ if (dev_id == -1) {
+ fprintf(stderr, "Error: Fail to find DMA %s.\n", dma_name);
+ goto end;
+ }
+
+ ldm->dma_ids[i] = dev_id;
+ configure_dmadev_queue(dev_id, ring_size);
+ ++nb_dmadevs;
+ }
+
+end:
+ if (nb_dmadevs < nb_workers) {
+ printf("Not enough dmadevs (%u) for all workers (%u).\n", nb_dmadevs, nb_workers);
+ return -1;
+ }
+
+ printf("Number of used dmadevs: %u.\n", nb_dmadevs);
+
+ return 0;
+}
+
+#define POLL_MAX 1000
+
+static inline int
+do_dma_mem_copy(void *p)
+{
+ uint16_t *para_idx = (uint16_t *)p;
+ volatile struct lcore_params *para = worker_params[*para_idx];
+ volatile struct worker_info *worker_info = &(para->worker_info);
+ uint16_t dev_id = para->dev_id;
+ uint32_t nr_buf = para->nr_buf;
+ uint16_t kick_batch = para->kick_batch;
+ uint32_t buf_size = para->buf_size;
+ struct rte_mbuf **srcs = para->srcs;
+ struct rte_mbuf **dsts = para->dsts;
+ int64_t async_cnt = 0;
+ int nr_cpl = 0;
+ uint32_t i;
+ uint32_t poll_cnt = 0;
+
+ worker_info->stop_flag = false;
+ worker_info->ready_flag = true;
+
+ while (!worker_info->start_flag)
+ ;
+
+ while (1) {
+ for (i = 0; i < nr_buf; i++) {
+ if (unlikely(rte_dma_copy(dev_id,
+ 0,
+ rte_pktmbuf_iova(srcs[i]),
+ rte_pktmbuf_iova(dsts[i]),
+ buf_size,
+ 0) < 0)) {
+ rte_dma_submit(dev_id, 0);
+ while (rte_dma_burst_capacity(dev_id, 0) == 0) {
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB,
+ NULL, NULL);
+ async_cnt -= nr_cpl;
+ worker_info->total_cpl += nr_cpl;
+ }
+ if (rte_dma_copy(dev_id,
+ 0,
+ rte_pktmbuf_iova(srcs[i]),
+ rte_pktmbuf_iova(dsts[i]),
+ buf_size,
+ 0) < 0) {
+ printf("enqueue fail again at %u\n", i);
+ printf("space:%d\n", rte_dma_burst_capacity(dev_id, 0));
+ rte_exit(EXIT_FAILURE, "DMA enqueue failed\n");
+ }
+ }
+ async_cnt++;
+
+ if ((async_cnt % kick_batch) == 0) {
+ rte_dma_submit(dev_id, 0);
+ /* add a poll to avoid ring full */
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
+ async_cnt -= nr_cpl;
+ worker_info->total_cpl += nr_cpl;
+ }
+ }
+
+ if (worker_info->stop_flag)
+ break;
+ }
+
+ rte_dma_submit(dev_id, 0);
+ while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) {
+ nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
+ async_cnt -= nr_cpl;
+ }
+
+ return 0;
+}
+
+static inline int
+do_cpu_mem_copy(void *p)
+{
+ uint16_t *para_idx = (uint16_t *)p;
+ volatile struct lcore_params *para = worker_params[*para_idx];
+ volatile struct worker_info *worker_info = &(para->worker_info);
+ uint32_t nr_buf = para->nr_buf;
+ uint32_t buf_size = para->buf_size;
+ struct rte_mbuf **srcs = para->srcs;
+ struct rte_mbuf **dsts = para->dsts;
+ uint32_t i;
+
+ worker_info->stop_flag = false;
+ worker_info->ready_flag = true;
+
+ while (!worker_info->start_flag)
+ ;
+
+ while (1) {
+ for (i = 0; i < nr_buf; i++) {
+ /* copy buffer form src to dst */
+ rte_memcpy((void *)(uintptr_t)rte_mbuf_data_iova(dsts[i]),
+ (void *)(uintptr_t)rte_mbuf_data_iova(srcs[i]),
+ (size_t)buf_size);
+ worker_info->total_cpl++;
+ }
+ if (worker_info->stop_flag)
+ break;
+ }
+
+ return 0;
+}
+
+static int
+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs,
+ struct rte_mbuf ***dsts)
+{
+ unsigned int buf_size = cfg->buf_size.cur;
+ unsigned int nr_sockets;
+ uint32_t nr_buf = cfg->nr_buf;
+
+ nr_sockets = rte_socket_count();
+ if (cfg->src_numa_node >= nr_sockets ||
+ cfg->dst_numa_node >= nr_sockets) {
+ printf("Error: Source or destination numa exceeds the acture numa nodes.\n");
+ return -1;
+ }
+
+ src_pool = rte_pktmbuf_pool_create("Benchmark_DMA_SRC",
+ nr_buf, /* n == num elements */
+ 64, /* cache size */
+ 0, /* priv size */
+ buf_size + RTE_PKTMBUF_HEADROOM,
+ cfg->src_numa_node);
+ if (src_pool == NULL) {
+ PRINT_ERR("Error with source mempool creation.\n");
+ return -1;
+ }
+
+ dst_pool = rte_pktmbuf_pool_create("Benchmark_DMA_DST",
+ nr_buf, /* n == num elements */
+ 64, /* cache size */
+ 0, /* priv size */
+ buf_size + RTE_PKTMBUF_HEADROOM,
+ cfg->dst_numa_node);
+ if (dst_pool == NULL) {
+ PRINT_ERR("Error with destination mempool creation.\n");
+ return -1;
+ }
+
+ *srcs = rte_malloc(NULL, nr_buf * sizeof(struct rte_mbuf *), 0);
+ if (*srcs == NULL) {
+ printf("Error: srcs malloc failed.\n");
+ return -1;
+ }
+
+ *dsts = rte_malloc(NULL, nr_buf * sizeof(struct rte_mbuf *), 0);
+ if (*dsts == NULL) {
+ printf("Error: dsts malloc failed.\n");
+ return -1;
+ }
+
+ if (rte_mempool_get_bulk(src_pool, (void **)*srcs, nr_buf) != 0) {
+ printf("get src mbufs failed.\n");
+ return -1;
+ }
+ if (rte_mempool_get_bulk(dst_pool, (void **)*dsts, nr_buf) != 0) {
+ printf("get dst mbufs failed.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+mem_copy_benchmark(struct test_configure *cfg, bool is_dma)
+{
+ uint16_t i;
+ uint32_t offset;
+ unsigned int lcore_id = 0;
+ struct rte_mbuf **srcs = NULL, **dsts = NULL;
+ struct lcore_dma_map_t *ldm = &cfg->lcore_dma_map;
+ unsigned int buf_size = cfg->buf_size.cur;
+ uint16_t kick_batch = cfg->kick_batch.cur;
+ uint32_t nr_buf = cfg->nr_buf = (cfg->mem_size.cur * 1024 * 1024) / (cfg->buf_size.cur * 2);
+ uint16_t nb_workers = ldm->cnt;
+ uint16_t test_secs = cfg->test_secs;
+ float memory;
+ uint32_t avg_cycles = 0;
+ float mops;
+ float bandwidth;
+
+ if (setup_memory_env(cfg, &srcs, &dsts) < 0)
+ goto out;
+
+ if (is_dma)
+ if (config_dmadevs(cfg) < 0)
+ goto out;
+
+ if (cfg->cache_flush) {
+ cache_flush_buf(srcs, buf_size, nr_buf);
+ cache_flush_buf(dsts, buf_size, nr_buf);
+ rte_mb();
+ }
+
+ printf("Start testing....\n");
+
+ for (i = 0; i < nb_workers; i++) {
+ lcore_id = ldm->lcores[i];
+ offset = nr_buf / nb_workers * i;
+
+ worker_params[i] = rte_malloc(NULL, sizeof(struct lcore_params), 0);
+ if (!worker_params[i]) {
+ printf("lcore parameters malloc failure for lcore %d\n", lcore_id);
+ break;
+ }
+ if (is_dma) {
+ worker_params[i]->dma_name = ldm->dma_names[i];
+ worker_params[i]->dev_id = ldm->dma_ids[i];
+ worker_params[i]->kick_batch = kick_batch;
+ }
+ worker_params[i]->worker_id = i;
+ worker_params[i]->nr_buf = (uint32_t)(nr_buf / nb_workers);
+ worker_params[i]->buf_size = buf_size;
+ worker_params[i]->test_secs = test_secs;
+ worker_params[i]->srcs = srcs + offset;
+ worker_params[i]->dsts = dsts + offset;
+ worker_params[i]->scenario_id = cfg->scenario_id;
+ worker_params[i]->lcore_id = lcore_id;
+
+ if (is_dma)
+ rte_eal_remote_launch(do_dma_mem_copy, (void *)(&i), lcore_id);
+ else
+ rte_eal_remote_launch(do_cpu_mem_copy, (void *)(&i), lcore_id);
+ }
+
+ while (1) {
+ bool ready = true;
+ for (i = 0; i < nb_workers; i++) {
+ if (worker_params[i]->worker_info.ready_flag == false) {
+ ready = 0;
+ break;
+ }
+ }
+ if (ready)
+ break;
+ }
+
+ for (i = 0; i < nb_workers; i++)
+ worker_params[i]->worker_info.start_flag = true;
+
+ usleep(TEST_WAIT_U_SECOND);
+ for (i = 0; i < nb_workers; i++)
+ worker_params[i]->worker_info.test_cpl = worker_params[i]->worker_info.total_cpl;
+
+ usleep(test_secs * 1000 * 1000);
+ for (i = 0; i < nb_workers; i++)
+ worker_params[i]->worker_info.test_cpl = worker_params[i]->worker_info.total_cpl -
+ worker_params[i]->worker_info.test_cpl;
+
+ for (i = 0; i < nb_workers; i++)
+ worker_params[i]->worker_info.stop_flag = true;
+
+ rte_eal_mp_wait_lcore();
+
+ for (i = 0; i < nb_workers; i++) {
+ calc_result(buf_size, nr_buf, nb_workers, test_secs,
+ worker_params[i]->worker_info.test_cpl,
+ &memory, &avg_cycles, &bandwidth, &mops);
+ output_result(cfg->scenario_id, worker_params[i]->lcore_id,
+ worker_params[i]->dma_name, avg_cycles, buf_size,
+ nr_buf / nb_workers, memory, bandwidth, mops, is_dma);
+ }
+
+out:
+ /* free env */
+ if (srcs)
+ rte_pktmbuf_free_bulk(srcs, nr_buf);
+ if (dsts)
+ rte_pktmbuf_free_bulk(dsts, nr_buf);
+
+ if (src_pool)
+ rte_mempool_free(src_pool);
+ if (dst_pool)
+ rte_mempool_free(dst_pool);
+
+ if (is_dma) {
+ for (i = 0; i < nb_workers; i++) {
+ printf("Stopping dmadev %d\n", ldm->dma_ids[i]);
+ rte_dma_stop(ldm->dma_ids[i]);
+ }
+ }
+}
new file mode 100644
@@ -0,0 +1,59 @@
+
+; This is an example configuration file for dma-perf, which details the meanings of each parameter
+; and instructions on how to use dma-perf.
+
+; Supported test types are DMA_MEM_COPY and CPU_MEM_COPY.
+
+; Parameters:
+; "mem_size" denotes the size of the memory footprint.
+; "buf_size" denotes the memory size of a single operation.
+; "dma_ring_size" denotes the dma ring buffer size. It should be greater than 64 normally.
+; "kick_batch" denotes the dma operation batch size, and should be greater than 1 normally.
+
+; The format for variables is variable=first[,last,increment[,ADD|MUL]]. ADD is the default mode.
+
+; src_numa_node is used to control the numa node where the source memory is allocated.
+; dst_numa_node is used to control the numa node where the destination memory is allocated.
+
+; cache_flush is used to determine whether or not the cache should be flushed, with 1 indicating to
+; flush and 0 indicating to not flush.
+
+; test_seconds controls the test time of the whole case.
+
+; To use DMA for a test, please specify the "lcore_dma" parameter.
+; If you have already set the "-l" and "-a" parameters using EAL,
+; make sure that the value of "lcore_dma" falls within their range of the values.
+
+; To use CPU for a test, please specify the "lcore" parameter.
+; If you have already set the "-l" and "-a" parameters using EAL,
+; make sure that the value of "lcore" falls within their range of values.
+
+; To specify a configuration file, use the "--config" flag followed by the path to the file.
+
+; To specify a result file, use the "--result" flag followed by the path to the file.
+; If you do not specify a result file, one will be generated with the same name as the configuration
+; file, with the addition of "_result.csv" at the end.
+
+[case1]
+type=DMA_MEM_COPY
+mem_size=10
+buf_size=64,8192,2,MUL
+dma_ring_size=1024
+kick_batch=32
+src_numa_node=0
+dst_numa_node=0
+cache_flush=0
+test_seconds=2
+lcore_dma=lcore10@0000:00:04.2, lcore11@0000:00:04.3
+eal_args=--in-memory --file-prefix=test
+
+[case2]
+type=CPU_MEM_COPY
+mem_size=10
+buf_size=64,8192,2,MUL
+src_numa_node=0
+dst_numa_node=1
+cache_flush=0
+test_seconds=2
+lcore = 3, 4
+eal_args=--in-memory --no-pci
new file mode 100644
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <inttypes.h>
+#include <libgen.h>
+
+#include <rte_eal.h>
+#include <rte_cfgfile.h>
+#include <rte_string_fns.h>
+#include <rte_lcore.h>
+
+#include "main.h"
+
+#define CSV_HDR_FMT "Case %u : %s,lcore,DMA,buffer size,nr_buf,memory(MB),cycle,bandwidth(Gbps),MOps\n"
+
+#define MAX_EAL_PARAM_NB 100
+#define MAX_EAL_PARAM_LEN 1024
+
+#define DMA_MEM_COPY "DMA_MEM_COPY"
+#define CPU_MEM_COPY "CPU_MEM_COPY"
+
+#define CMDLINE_CONFIG_ARG "--config"
+#define CMDLINE_RESULT_ARG "--result"
+
+#define MAX_PARAMS_PER_ENTRY 4
+
+#define MAX_LONG_OPT_SZ 64
+
+enum {
+ TEST_TYPE_NONE = 0,
+ TEST_TYPE_DMA_MEM_COPY,
+ TEST_TYPE_CPU_MEM_COPY
+};
+
+#define MAX_TEST_CASES 16
+static struct test_configure test_cases[MAX_TEST_CASES];
+
+char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];
+
+static FILE *fd;
+
+static void
+output_csv(bool need_blankline)
+{
+ uint32_t i;
+
+ if (need_blankline) {
+ fprintf(fd, ",,,,,,,,\n");
+ fprintf(fd, ",,,,,,,,\n");
+ }
+
+ for (i = 0; i < RTE_DIM(output_str); i++) {
+ if (output_str[i][0]) {
+ fprintf(fd, "%s", output_str[i]);
+ output_str[i][0] = '\0';
+ }
+ }
+
+ fflush(fd);
+}
+
+static void
+output_env_info(void)
+{
+ snprintf(output_str[0], MAX_OUTPUT_STR_LEN, "test environment:\n");
+ snprintf(output_str[1], MAX_OUTPUT_STR_LEN, "CPU frequency,%" PRIu64 "\n", rte_get_timer_hz());
+
+ output_csv(true);
+}
+
+static void
+output_header(uint32_t case_id, struct test_configure *case_cfg)
+{
+ snprintf(output_str[0], MAX_OUTPUT_STR_LEN,
+ CSV_HDR_FMT, case_id, case_cfg->test_type_str);
+
+ output_csv(true);
+}
+
+static void
+run_test_case(struct test_configure *case_cfg)
+{
+ switch (case_cfg->test_type) {
+ case TEST_TYPE_DMA_MEM_COPY:
+ mem_copy_benchmark(case_cfg, true);
+ break;
+ case TEST_TYPE_CPU_MEM_COPY:
+ mem_copy_benchmark(case_cfg, false);
+ break;
+ default:
+ printf("Unknown test type. %s\n", case_cfg->test_type_str);
+ break;
+ }
+}
+
+static void
+run_test(uint32_t case_id, struct test_configure *case_cfg)
+{
+ uint32_t i;
+ uint32_t nb_lcores = rte_lcore_count();
+ struct test_configure_entry *mem_size = &case_cfg->mem_size;
+ struct test_configure_entry *buf_size = &case_cfg->buf_size;
+ struct test_configure_entry *ring_size = &case_cfg->ring_size;
+ struct test_configure_entry *kick_batch = &case_cfg->kick_batch;
+ struct test_configure_entry dummy = { 0 };
+ struct test_configure_entry *var_entry = &dummy;
+
+
+
+ for (i = 0; i < RTE_DIM(output_str); i++)
+ memset(output_str[i], 0, MAX_OUTPUT_STR_LEN);
+
+ if (nb_lcores <= case_cfg->lcore_dma_map.cnt) {
+ printf("Case %u: Not enough lcores.\n", case_id);
+ return;
+ }
+
+ printf("Number of used lcores: %u.\n", nb_lcores);
+
+ if (mem_size->incr != 0)
+ var_entry = mem_size;
+
+ if (buf_size->incr != 0)
+ var_entry = buf_size;
+
+ if (ring_size->incr != 0)
+ var_entry = ring_size;
+
+ if (kick_batch->incr != 0)
+ var_entry = kick_batch;
+
+ case_cfg->scenario_id = 0;
+
+ output_header(case_id, case_cfg);
+
+ for (var_entry->cur = var_entry->first; var_entry->cur <= var_entry->last;) {
+ case_cfg->scenario_id++;
+ printf("\nRunning scenario %d\n", case_cfg->scenario_id);
+
+ run_test_case(case_cfg);
+ output_csv(false);
+
+ if (var_entry->op == OP_MUL)
+ var_entry->cur *= var_entry->incr;
+ else if (var_entry->op == OP_ADD)
+ var_entry->cur += var_entry->incr;
+ else
+ break;
+ }
+
+}
+
+static int
+parse_lcore(struct test_configure *test_case, const char *value)
+{
+ size_t len = strlen(value);
+ char *input = (char *) malloc((len + 1) * sizeof(char));
+ strcpy(input, value);
+ struct lcore_dma_map_t *lcore_dma_map = &(test_case->lcore_dma_map);
+
+ if (test_case == NULL || value == NULL)
+ return -1;
+
+ memset(lcore_dma_map, 0, sizeof(struct lcore_dma_map_t));
+
+ char *token = strtok(input, ", ");
+ while (token != NULL) {
+ if (lcore_dma_map->cnt >= MAX_LCORE_NB) {
+ free(input);
+ return -1;
+ }
+
+ uint16_t lcore_id = atoi(token);
+ lcore_dma_map->lcores[lcore_dma_map->cnt++] = lcore_id;
+
+ token = strtok(NULL, ", ");
+ }
+
+ free(input);
+ return 0;
+}
+
+static int
+parse_lcore_dma(struct test_configure *test_case, const char *value)
+{
+ struct lcore_dma_map_t *lcore_dma_map;
+ char *input = strndup(value, strlen(value) + 1);
+ char *addrs = input;
+ char *ptrs[2];
+ char *start, *end, *substr;
+ uint16_t lcore_id;
+ int ret = 0;
+
+ while (*addrs == '\0')
+ addrs++;
+ if (*addrs == '\0') {
+ fprintf(stderr, "No input DMA addresses\n");
+ ret = -1;
+ goto out;
+ }
+
+ substr = strtok(addrs, ",");
+ if (substr == NULL) {
+ fprintf(stderr, "No input DMA address\n");
+ ret = -1;
+ goto out;
+ }
+
+ memset(&test_case->lcore_dma_map, 0, sizeof(struct lcore_dma_map_t));
+
+ do {
+ rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
+
+ start = strstr(ptrs[0], "lcore");
+ if (start == NULL) {
+ fprintf(stderr, "Illegal lcore\n");
+ ret = -1;
+ break;
+ }
+
+ start += 5;
+ lcore_id = strtol(start, &end, 0);
+ if (end == start) {
+ fprintf(stderr, "No input lcore ID or ID %d is wrong\n", lcore_id);
+ ret = -1;
+ break;
+ }
+
+ lcore_dma_map = &test_case->lcore_dma_map;
+ lcore_dma_map->lcores[lcore_dma_map->cnt] = lcore_id;
+ strcpy(lcore_dma_map->dma_names[lcore_dma_map->cnt], ptrs[1]);
+ lcore_dma_map->cnt++;
+ substr = strtok(NULL, ",");
+ } while (substr != NULL);
+
+out:
+ free(input);
+ return ret;
+}
+
+static int
+parse_entry(const char *value, struct test_configure_entry *entry)
+{
+ char input[255] = {0};
+ char *args[MAX_PARAMS_PER_ENTRY];
+ int args_nr = -1;
+
+ if (value == NULL || entry == NULL)
+ goto out;
+
+ strncpy(input, value, 254);
+ if (*input == '\0')
+ goto out;
+
+ args_nr = rte_strsplit(input, strlen(input), args, MAX_PARAMS_PER_ENTRY, ',');
+ if (args_nr <= 0)
+ goto out;
+
+ entry->cur = entry->first = (uint32_t)atoi(args[0]);
+ entry->last = args_nr > 1 ? (uint32_t)atoi(args[1]) : 0;
+ entry->incr = args_nr > 2 ? (uint32_t)atoi(args[2]) : 0;
+
+ if (args_nr > 3) {
+ if (!strcmp(args[3], "MUL"))
+ entry->op = OP_MUL;
+ else if (!strcmp(args[3], "ADD"))
+ entry->op = OP_ADD;
+ else {
+ printf("Invalid op %s", args[3]);
+ args_nr = -1;
+ }
+ } else
+ entry->op = OP_NONE;
+out:
+ return args_nr;
+}
+
+static uint16_t
+load_configs(const char *path)
+{
+ struct rte_cfgfile *cfgfile;
+ int nb_sections, i;
+ struct test_configure *test_case;
+ char section_name[CFG_NAME_LEN];
+ const char *case_type;
+ const char *lcore_dma;
+ const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str;
+ int args_nr, nb_vp;
+ bool is_dma;
+
+ printf("config file parsing...\n");
+ cfgfile = rte_cfgfile_load(path, 0);
+ if (!cfgfile) {
+ printf("Open configure file error.\n");
+ exit(1);
+ }
+
+ nb_sections = rte_cfgfile_num_sections(cfgfile, NULL, 0);
+ if (nb_sections > MAX_TEST_CASES) {
+ printf("Error: The maximum number of cases is %d.\n", MAX_TEST_CASES);
+ exit(1);
+ }
+
+ for (i = 0; i < nb_sections; i++) {
+ snprintf(section_name, CFG_NAME_LEN, "case%d", i + 1);
+ test_case = &test_cases[i];
+ case_type = rte_cfgfile_get_entry(cfgfile, section_name, "type");
+ if (!case_type) {
+ printf("Error: No case type in case %d, the test will be finished here.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ }
+
+ if (strcmp(case_type, DMA_MEM_COPY) == 0) {
+ test_case->test_type = TEST_TYPE_DMA_MEM_COPY;
+ test_case->test_type_str = DMA_MEM_COPY;
+ is_dma = true;
+ } else if (strcmp(case_type, CPU_MEM_COPY) == 0) {
+ test_case->test_type = TEST_TYPE_CPU_MEM_COPY;
+ test_case->test_type_str = CPU_MEM_COPY;
+ is_dma = false;
+ } else {
+ printf("Error: Cannot find case type %s in case%d.\n", case_type, i + 1);
+ test_case->is_valid = false;
+ continue;
+ }
+
+ nb_vp = 0;
+
+ test_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "src_numa_node"));
+ test_case->dst_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "dst_numa_node"));
+
+ mem_size_str = rte_cfgfile_get_entry(cfgfile, section_name, "mem_size");
+ args_nr = parse_entry(mem_size_str, &test_case->mem_size);
+ if (args_nr < 0) {
+ printf("parse error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ buf_size_str = rte_cfgfile_get_entry(cfgfile, section_name, "buf_size");
+ args_nr = parse_entry(buf_size_str, &test_case->buf_size);
+ if (args_nr < 0) {
+ printf("parse error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ if (is_dma) {
+ ring_size_str = rte_cfgfile_get_entry(cfgfile, section_name,
+ "dma_ring_size");
+ args_nr = parse_entry(ring_size_str, &test_case->ring_size);
+ if (args_nr < 0) {
+ printf("parse error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ kick_batch_str = rte_cfgfile_get_entry(cfgfile, section_name, "kick_batch");
+ args_nr = parse_entry(kick_batch_str, &test_case->kick_batch);
+ if (args_nr < 0) {
+ printf("parse error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ } else if (args_nr > 1)
+ nb_vp++;
+
+ lcore_dma = rte_cfgfile_get_entry(cfgfile, section_name, "lcore_dma");
+ int lcore_ret = parse_lcore_dma(test_case, lcore_dma);
+ if (lcore_ret < 0) {
+ printf("parse lcore dma error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ }
+ } else {
+ lcore_dma = rte_cfgfile_get_entry(cfgfile, section_name, "lcore");
+ int lcore_ret = parse_lcore(test_case, lcore_dma);
+ if (lcore_ret < 0) {
+ printf("parse lcore error in case %d.\n", i + 1);
+ test_case->is_valid = false;
+ continue;
+ }
+ }
+
+ if (nb_vp > 1) {
+ printf("Error, each section can only have a single variable parameter.\n");
+ test_case->is_valid = false;
+ continue;
+ }
+
+ test_case->cache_flush =
+ (int)atoi(rte_cfgfile_get_entry(cfgfile, section_name, "cache_flush"));
+ test_case->test_secs = (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile,
+ section_name, "test_seconds"));
+
+ test_case->eal_args = rte_cfgfile_get_entry(cfgfile, section_name, "eal_args");
+ test_case->is_valid = true;
+ }
+
+ rte_cfgfile_close(cfgfile);
+ printf("config file parsing complete.\n\n");
+ return i;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+append_eal_args(int argc, char **argv, const char *eal_args, char **new_argv)
+{
+ int i;
+ char *tokens[MAX_EAL_PARAM_NB];
+ char args[MAX_EAL_PARAM_LEN] = {0};
+ int token_nb, new_argc = 0;
+
+ for (i = 0; i < argc; i++) {
+ if ((strcmp(argv[i], CMDLINE_CONFIG_ARG) == 0) ||
+ (strcmp(argv[i], CMDLINE_RESULT_ARG) == 0)) {
+ i++;
+ continue;
+ }
+ strlcpy(new_argv[new_argc], argv[i], sizeof(new_argv[new_argc]));
+ new_argc++;
+ }
+
+ if (eal_args) {
+ strlcpy(args, eal_args, sizeof(args));
+ token_nb = rte_strsplit(args, strlen(args),
+ tokens, MAX_EAL_PARAM_NB, ' ');
+ for (i = 0; i < token_nb; i++)
+ strcpy(new_argv[new_argc++], tokens[i]);
+ }
+
+ return new_argc;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ uint16_t case_nb;
+ uint32_t i, nb_lcores;
+ pid_t cpid, wpid;
+ int wstatus;
+ char args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN];
+ char *pargs[MAX_EAL_PARAM_NB];
+ char *cfg_path_ptr = NULL;
+ char *rst_path_ptr = NULL;
+ char rst_path[PATH_MAX];
+ int new_argc;
+ bool is_first_case = true;
+
+ memset(args, 0, sizeof(args));
+
+ for (i = 0; i < RTE_DIM(pargs); i++)
+ pargs[i] = args[i];
+
+ for (i = 0; i < (uint32_t)argc; i++) {
+ if (strncmp(argv[i], CMDLINE_CONFIG_ARG, MAX_LONG_OPT_SZ) == 0)
+ cfg_path_ptr = argv[i + 1];
+ if (strncmp(argv[i], CMDLINE_RESULT_ARG, MAX_LONG_OPT_SZ) == 0)
+ rst_path_ptr = argv[i + 1];
+ }
+ if (cfg_path_ptr == NULL) {
+ printf("Config file not assigned.\n");
+ return -1;
+ }
+ if (rst_path_ptr == NULL) {
+ strlcpy(rst_path, cfg_path_ptr, PATH_MAX);
+ strcat(strtok(basename(rst_path), "."), "_result.csv");
+ rst_path_ptr = rst_path;
+ }
+
+ case_nb = load_configs(cfg_path_ptr);
+ fd = fopen(rst_path_ptr, "w");
+ if (fd == NULL) {
+ printf("Open output CSV file error.\n");
+ return -1;
+ }
+ fclose(fd);
+
+ for (i = 0; i < case_nb; i++) {
+ if (test_cases[i].test_type == TEST_TYPE_NONE) {
+ printf("No test type in test case %d.\n\n", i + 1);
+ continue;
+ }
+ if (!test_cases[i].is_valid) {
+ printf("Invalid test case %d.\n\n", i + 1);
+ continue;
+ }
+
+ cpid = fork();
+ if (cpid < 0) {
+ printf("Fork case %d failed.\n", i + 1);
+ exit(EXIT_FAILURE);
+ } else if (cpid == 0) {
+ printf("\nRunning case %u\n\n", i + 1);
+
+ new_argc = append_eal_args(argc, argv, test_cases[i].eal_args, pargs);
+ ret = rte_eal_init(new_argc, pargs);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+
+ /* Check lcores. */
+ nb_lcores = rte_lcore_count();
+ if (nb_lcores < 2)
+ rte_exit(EXIT_FAILURE,
+ "There should be at least 2 worker lcores.\n");
+
+ fd = fopen(rst_path_ptr, "a");
+ if (!fd) {
+ printf("Open output CSV file error.\n");
+ return 0;
+ }
+
+ if (is_first_case) {
+ output_env_info();
+ is_first_case = false;
+ }
+ run_test(i + 1, &test_cases[i]);
+
+ /* clean up the EAL */
+ rte_eal_cleanup();
+
+ fclose(fd);
+
+ printf("\nCase %u completed.\n\n", i + 1);
+
+ exit(EXIT_SUCCESS);
+ } else {
+ wpid = waitpid(cpid, &wstatus, 0);
+ if (wpid == -1) {
+ printf("waitpid error.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (WIFEXITED(wstatus))
+ printf("Case process exited. status %d\n\n",
+ WEXITSTATUS(wstatus));
+ else if (WIFSIGNALED(wstatus))
+ printf("Case process killed by signal %d\n\n",
+ WTERMSIG(wstatus));
+ else if (WIFSTOPPED(wstatus))
+ printf("Case process stopped by signal %d\n\n",
+ WSTOPSIG(wstatus));
+ else if (WIFCONTINUED(wstatus))
+ printf("Case process continued.\n\n");
+ else
+ printf("Case process unknown terminated.\n\n");
+ }
+ }
+
+ printf("Bye...\n");
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+#include <rte_dmadev.h>
+
+#ifndef __maybe_unused
+#define __maybe_unused __rte_unused
+#endif
+
+#define MAX_WORKER_NB 128
+#define MAX_OUTPUT_STR_LEN 512
+
+#define MAX_DMA_NB 128
+#define MAX_LCORE_NB 256
+
+extern char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN];
+
+typedef enum {
+ OP_NONE = 0,
+ OP_ADD,
+ OP_MUL
+} alg_op_type;
+
+struct test_configure_entry {
+ uint32_t first;
+ uint32_t last;
+ uint32_t incr;
+ alg_op_type op;
+ uint32_t cur;
+};
+
+struct lcore_dma_map_t {
+ uint32_t lcores[MAX_WORKER_NB];
+ char dma_names[MAX_WORKER_NB][RTE_DEV_NAME_MAX_LEN];
+ int16_t dma_ids[MAX_WORKER_NB];
+ uint16_t cnt;
+};
+
+struct test_configure {
+ bool is_valid;
+ uint8_t test_type;
+ const char *test_type_str;
+ uint16_t src_numa_node;
+ uint16_t dst_numa_node;
+ uint16_t opcode;
+ bool is_dma;
+ struct lcore_dma_map_t lcore_dma_map;
+ struct test_configure_entry mem_size;
+ struct test_configure_entry buf_size;
+ struct test_configure_entry ring_size;
+ struct test_configure_entry kick_batch;
+ uint32_t cache_flush;
+ uint32_t nr_buf;
+ uint16_t test_secs;
+ const char *eal_args;
+ uint8_t scenario_id;
+};
+
+void mem_copy_benchmark(struct test_configure *cfg, bool is_dma);
+
+#endif /* _MAIN_H_ */
new file mode 100644
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019-2023 Intel Corporation
+
+# meson file, for building this app as part of a main DPDK build.
+
+if is_windows
+ build = false
+ reason = 'not supported on Windows'
+ subdir_done()
+endif
+
+deps += ['dmadev', 'mbuf', 'cfgfile']
+
+sources = files(
+ 'main.c',
+ 'benchmark.c',
+)