[v3,1/7] app/test-compress-perf: add weak functions for multi-cores test

Message ID 1561566633-12329-2-git-send-email-tjozwiakgm@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series add multiple cores feature to test-compress-perf |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Tomasz Jóźwiak June 26, 2019, 4:30 p.m. UTC
From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>

This patch adds a template functions for multi-cores performance
version of compress-perf-tool.

Signed-off-by: Tomasz Jozwiak <tjozwiakgm@gmail.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/Makefile                  |   3 +-
 app/test-compress-perf/comp_perf.h               |  61 +++
 app/test-compress-perf/comp_perf_options.h       |  45 +-
 app/test-compress-perf/comp_perf_options_parse.c |  24 +-
 app/test-compress-perf/comp_perf_test_common.c   | 285 +++++++++++
 app/test-compress-perf/comp_perf_test_common.h   |  41 ++
 app/test-compress-perf/main.c                    | 626 ++++++++++-------------
 app/test-compress-perf/meson.build               |   3 +-
 8 files changed, 686 insertions(+), 402 deletions(-)
 create mode 100644 app/test-compress-perf/comp_perf.h
 create mode 100644 app/test-compress-perf/comp_perf_test_common.c
 create mode 100644 app/test-compress-perf/comp_perf_test_common.h
  

Patch

diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index d20e17e..de74129 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -12,7 +12,6 @@  CFLAGS += -O3
 # all source are stored in SRCS-y
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
-SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_common.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h
new file mode 100644
index 0000000..144ad8a
--- /dev/null
+++ b/app/test-compress-perf/comp_perf.h
@@ -0,0 +1,61 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_
+#define _COMP_PERF_
+
+#include <rte_mempool.h>
+
+struct comp_test_data;
+
+typedef void  *(*cperf_constructor_t)(
+		uint8_t dev_id,
+		uint16_t qp_id,
+		struct comp_test_data *options);
+
+typedef int (*cperf_runner_t)(void *test_ctx);
+typedef void (*cperf_destructor_t)(void *test_ctx);
+
+struct cperf_test {
+	cperf_constructor_t constructor;
+	cperf_runner_t runner;
+	cperf_destructor_t destructor;
+};
+
+/* Needed for weak functions*/
+
+void *
+cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused);
+
+void
+cperf_benchmark_test_destructor(void *arg __rte_unused);
+
+int
+cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+
+void *
+cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused);
+
+void
+cperf_verify_test_destructor(void *arg __rte_unused);
+
+int
+cperf_verify_test_runner(void *test_ctx __rte_unused);
+
+void *
+cperf_pmd_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused);
+
+void
+cperf_pmd_cyclecount_test_destructor(void *arg __rte_unused);
+
+int
+cperf_pmd_cyclecount_test_runner(void *test_ctx __rte_unused);
+
+#endif /* _COMP_PERF_ */
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index f87751d..79e63d5 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -13,6 +13,24 @@ 
 #define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM)
 #define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO))
 
+extern const char *cperf_test_type_strs[];
+
+/* Cleanup state machine */
+enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_DURING_TEST
+};
+
+enum cperf_perf_test_type {
+	CPERF_TEST_TYPE_BENCHMARK,
+	CPERF_TEST_TYPE_VERIFY,
+	CPERF_TEST_TYPE_PMDCC
+};
+
 enum comp_operation {
 	COMPRESS_ONLY,
 	DECOMPRESS_ONLY,
@@ -30,37 +48,26 @@  struct range_list {
 struct comp_test_data {
 	char driver_name[64];
 	char input_file[64];
-	struct rte_mbuf **comp_bufs;
-	struct rte_mbuf **decomp_bufs;
-	uint32_t total_bufs;
+	enum cperf_perf_test_type test;
+
 	uint8_t *input_data;
 	size_t input_data_sz;
-	uint8_t *compressed_data;
-	uint8_t *decompressed_data;
-	struct rte_mempool *comp_buf_pool;
-	struct rte_mempool *decomp_buf_pool;
-	struct rte_mempool *op_pool;
-	int8_t cdev_id;
+	uint16_t nb_qps;
 	uint16_t seg_sz;
 	uint16_t out_seg_sz;
 	uint16_t burst_sz;
 	uint32_t pool_sz;
 	uint32_t num_iter;
 	uint16_t max_sgl_segs;
+
 	enum rte_comp_huffman huffman_enc;
 	enum comp_operation test_op;
 	int window_sz;
-	struct range_list level;
-	/* Store TSC duration for all levels (including level 0) */
-	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
-	size_t comp_data_sz;
-	size_t decomp_data_sz;
+	struct range_list level_lst;
+	uint8_t level;
+
 	double ratio;
-	double comp_gbps;
-	double decomp_gbps;
-	double comp_tsc_byte;
-	double decomp_tsc_byte;
+	enum cleanup_st cleanup;
 };
 
 int
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 2fb6fb4..bc4b98a 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -466,19 +466,20 @@  parse_level(struct comp_test_data *test_data, const char *arg)
 	 * Try parsing the argument as a range, if it fails,
 	 * arse it as a list
 	 */
-	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
-			&test_data->level.inc) < 0) {
-		ret = parse_list(arg, test_data->level.list,
-					&test_data->level.min,
-					&test_data->level.max);
+	if (parse_range(arg, &test_data->level_lst.min,
+			&test_data->level_lst.max,
+			&test_data->level_lst.inc) < 0) {
+		ret = parse_list(arg, test_data->level_lst.list,
+					&test_data->level_lst.min,
+					&test_data->level_lst.max);
 		if (ret < 0) {
 			RTE_LOG(ERR, USER1,
 				"Failed to parse compression level/s\n");
 			return -1;
 		}
-		test_data->level.count = ret;
+		test_data->level_lst.count = ret;
 
-		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+		if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) {
 			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
 					RTE_COMP_LEVEL_MAX);
 			return -1;
@@ -498,7 +499,6 @@  struct long_opt_parser {
 };
 
 static struct option lgopts[] = {
-
 	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
 	{ CPERF_TEST_FILE, required_argument, 0, 0 },
 	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
@@ -572,7 +572,6 @@  comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
 void
 comp_perf_options_default(struct comp_test_data *test_data)
 {
-	test_data->cdev_id = -1;
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
@@ -581,9 +580,10 @@  comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
-	test_data->level.min = 1;
-	test_data->level.max = 9;
-	test_data->level.inc = 1;
+	test_data->level_lst.min = 1;
+	test_data->level_lst.max = 9;
+	test_data->level_lst.inc = 1;
+	test_data->test = CPERF_TEST_TYPE_BENCHMARK;
 }
 
 int
diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c
new file mode 100644
index 0000000..1d2b25e
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_common.c
@@ -0,0 +1,285 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_verify.h"
+#include "comp_perf_test_benchmark.h"
+#include "comp_perf.h"
+#include "comp_perf_test_common.h"
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static uint32_t
+find_buf_size(uint32_t input_size)
+{
+	uint32_t i;
+
+	/* From performance point of view the buffer size should be a
+	 * power of 2 but also should be enough to store incompressible data
+	 */
+
+	/* We're looking for nearest power of 2 buffer size, which is greather
+	 * than input_size
+	 */
+	uint32_t size =
+		!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
+
+	for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
+		;
+
+	return i > ((UINT16_MAX + 1) >> 1)
+			? (uint32_t)((float)input_size * EXPANSE_RATIO)
+			: i;
+}
+
+void
+comp_perf_free_memory(struct cperf_mem_resources *mem)
+{
+	uint32_t i;
+
+	for (i = 0; i < mem->total_bufs; i++) {
+		rte_pktmbuf_free(mem->comp_bufs[i]);
+		rte_pktmbuf_free(mem->decomp_bufs[i]);
+	}
+
+	rte_free(mem->decomp_bufs);
+	rte_free(mem->comp_bufs);
+	rte_free(mem->decompressed_data);
+	rte_free(mem->compressed_data);
+	rte_mempool_free(mem->op_pool);
+	rte_mempool_free(mem->decomp_buf_pool);
+	rte_mempool_free(mem->comp_buf_pool);
+}
+
+int
+comp_perf_allocate_memory(struct comp_test_data *test_data,
+			  struct cperf_mem_resources *mem)
+{
+	test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	char pool_name[32] = "";
+
+	snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u",
+			mem->dev_id, mem->qp_id);
+	mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name,
+				total_segs,
+				0, 0,
+				test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (mem->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u",
+			mem->dev_id, mem->qp_id);
+	mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name,
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (mem->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	mem->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
+			mem->dev_id, mem->qp_id);
+	mem->op_pool = rte_comp_op_pool_create(pool_name,
+				  mem->total_bufs,
+				  0, 0, rte_socket_id());
+	if (mem->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	mem->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+						+ MIN_COMPRESSED_BUF_SIZE, 0,
+				rte_socket_id());
+	if (mem->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	mem->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (mem->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	mem->comp_bufs = rte_zmalloc_socket(NULL,
+			mem->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (mem->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	mem->decomp_bufs = rte_zmalloc_socket(NULL,
+			mem->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (mem->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+int
+prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < mem->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		mem->decomp_bufs[i] =
+			rte_pktmbuf_alloc(mem->decomp_buf_pool);
+		if (mem->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					mem->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(mem->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(mem->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		mem->comp_bufs[i] =
+			rte_pktmbuf_alloc(mem->comp_buf_pool);
+		if (mem->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					mem->comp_bufs[i],
+					test_data->out_seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(mem->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->out_seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(mem->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
new file mode 100644
index 0000000..9c11e3a
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -0,0 +1,41 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_COMMON_H_
+#define _COMP_PERF_TEST_COMMON_H_
+
+#include <stdint.h>
+
+#include <rte_mempool.h>
+
+struct cperf_mem_resources {
+	uint8_t dev_id;
+	uint16_t qp_id;
+	uint8_t lcore_id;
+	uint32_t total_bufs;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+};
+
+int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range);
+
+void
+comp_perf_free_memory(struct cperf_mem_resources *mem);
+
+int
+comp_perf_allocate_memory(struct comp_test_data *test_data,
+			  struct cperf_mem_resources *mem);
+
+int
+prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
+
+#endif /* _COMP_PERF_TEST_COMMON_H_ */
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 7b9ee74..d142b91 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -8,56 +8,44 @@ 
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf.h"
+#include "comp_perf_test_common.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
 
-#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
-
-/* Cleanup state machine */
-static enum cleanup_st {
-	ST_CLEAR = 0,
-	ST_TEST_DATA,
-	ST_COMPDEV,
-	ST_INPUT_DATA,
-	ST_MEMORY_ALLOC,
-	ST_PREPARE_BUF,
-	ST_DURING_TEST
-} cleanup = ST_CLEAR;
-
-static int
-param_range_check(uint16_t size, const struct rte_param_log2_range *range)
-{
-	unsigned int next_size;
-
-	/* Check lower/upper bounds */
-	if (size < range->min)
-		return -1;
-
-	if (size > range->max)
-		return -1;
-
-	/* If range is actually only one value, size is correct */
-	if (range->increment == 0)
-		return 0;
-
-	/* Check if value is one of the supported sizes */
-	for (next_size = range->min; next_size <= range->max;
-			next_size += range->increment)
-		if (size == next_size)
-			return 0;
-
-	return -1;
-}
+__extension__
+const char *cperf_test_type_strs[] = {
+	[CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
+	[CPERF_TEST_TYPE_VERIFY] = "verify",
+	[CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount"
+};
+
+__extension__
+static const struct cperf_test cperf_testmap[] = {
+	[CPERF_TEST_TYPE_BENCHMARK] = {
+			cperf_benchmark_test_constructor,
+			cperf_benchmark_test_runner,
+			cperf_benchmark_test_destructor
+	},
+	[CPERF_TEST_TYPE_VERIFY] = {
+			cperf_verify_test_constructor,
+			cperf_verify_test_runner,
+			cperf_verify_test_destructor
+	},
+	[CPERF_TEST_TYPE_PMDCC] = {
+			cperf_pmd_cyclecount_test_constructor,
+			cperf_pmd_cyclecount_test_runner,
+			cperf_pmd_cyclecount_test_destructor
+	}
+};
 
 static int
-comp_perf_check_capabilities(struct comp_test_data *test_data)
+comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id)
 {
 	const struct rte_compressdev_capabilities *cap;
 
-	cap = rte_compressdev_capability_get(test_data->cdev_id,
+	cap = rte_compressdev_capability_get(cdev_id,
 					     RTE_COMP_ALGO_DEFLATE);
 
 	if (cap == NULL) {
@@ -105,7 +93,7 @@  comp_perf_check_capabilities(struct comp_test_data *test_data)
 	}
 
 	/* Level 0 support */
-	if (test_data->level.min == 0 &&
+	if (test_data->level_lst.min == 0 &&
 			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
 		RTE_LOG(ERR, USER1, "Compress device does not support "
 				"level 0 (no compression)\n");
@@ -115,110 +103,108 @@  comp_perf_check_capabilities(struct comp_test_data *test_data)
 	return 0;
 }
 
-static uint32_t
-find_buf_size(uint32_t input_size)
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data,
+				 uint8_t *enabled_cdevs)
 {
-	uint32_t i;
+	uint8_t enabled_cdev_count, nb_lcores, cdev_id;
+	unsigned int i, j;
+	int ret;
 
-	/* From performance point of view the buffer size should be a
-	 * power of 2 but also should be enough to store incompressible data
-	 */
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
 
-	/* We're looking for nearest power of 2 buffer size, which is greather
-	 * than input_size
+	nb_lcores = rte_lcore_count() - 1;
+	/*
+	 * Use fewer devices,
+	 * if there are more available than cores.
 	 */
-	uint32_t size =
-		!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
-
-	for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
-		;
-
-	return i > ((UINT16_MAX + 1) >> 1)
-			? (uint32_t)((float)input_size * EXPANSE_RATIO)
-			: i;
-}
-
-static int
-comp_perf_allocate_memory(struct comp_test_data *test_data)
-{
+	if (enabled_cdev_count > nb_lcores) {
+		enabled_cdev_count = nb_lcores;
+		RTE_LOG(INFO, USER1,
+			" There's more available devices than cores!"
+			" The number of devices has been aligned to %d cores\n",
+			nb_lcores);
+	}
 
-	test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
-	/* Number of segments for input and output
-	 * (compression and decompression)
+	/*
+	 * Calculate number of needed queue pairs, based on the amount
+	 * of available number of logical cores and compression devices.
+	 * For instance, if there are 4 cores and 2 compression devices,
+	 * 2 queue pairs will be set up per device.
+	 * One queue pair per one core.
+	 * if e.g.: there're 3 cores and 2 compression devices,
+	 * 2 queue pairs will be set up per device but one queue pair
+	 * will left unused in the last one device
 	 */
-	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
-			test_data->seg_sz);
-	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
-				total_segs,
-				0, 0,
-				test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
-				rte_socket_id());
-	if (test_data->comp_buf_pool == NULL) {
-		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
-		return -1;
-	}
+	test_data->nb_qps = (nb_lcores % enabled_cdev_count) ?
+				(nb_lcores / enabled_cdev_count) + 1 :
+				nb_lcores / enabled_cdev_count;
 
-	cleanup = ST_MEMORY_ALLOC;
-	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
-				total_segs,
-				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
-				rte_socket_id());
-	if (test_data->decomp_buf_pool == NULL) {
-		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
-		return -1;
-	}
+	for (i = 0; i < enabled_cdev_count &&
+			i < RTE_COMPRESS_MAX_DEVS; i++,
+					nb_lcores -= test_data->nb_qps) {
+		cdev_id = enabled_cdevs[i];
 
-	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+		struct rte_compressdev_info cdev_info;
+		uint8_t socket_id = rte_compressdev_socket_id(cdev_id);
 
-	test_data->op_pool = rte_comp_op_pool_create("op_pool",
-				  test_data->total_bufs,
-				  0, 0, rte_socket_id());
-	if (test_data->op_pool == NULL) {
-		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
-		return -1;
-	}
+		rte_compressdev_info_get(cdev_id, &cdev_info);
+		if (cdev_info.max_nb_queue_pairs &&
+			test_data->nb_qps > cdev_info.max_nb_queue_pairs) {
+			RTE_LOG(ERR, USER1,
+				"Number of needed queue pairs is higher "
+				"than the maximum number of queue pairs "
+				"per device.\n");
+			RTE_LOG(ERR, USER1,
+				"Lower the number of cores or increase "
+				"the number of crypto devices\n");
+			return -EINVAL;
+		}
 
-	/*
-	 * Compressed data might be a bit larger than input data,
-	 * if data cannot be compressed
-	 */
-	test_data->compressed_data = rte_zmalloc_socket(NULL,
-				test_data->input_data_sz * EXPANSE_RATIO
-						+ MIN_COMPRESSED_BUF_SIZE, 0,
-				rte_socket_id());
-	if (test_data->compressed_data == NULL) {
-		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
-				"file could not be allocated\n");
-		return -1;
-	}
+		if (comp_perf_check_capabilities(test_data, cdev_id) < 0)
+			return -EINVAL;
+
+		/* Configure compressdev */
+		struct rte_compressdev_config config = {
+			.socket_id = socket_id,
+			.nb_queue_pairs = nb_lcores > test_data->nb_qps
+					? test_data->nb_qps : nb_lcores,
+			.max_nb_priv_xforms = NUM_MAX_XFORMS,
+			.max_nb_streams = 0
+		};
+
+		if (rte_compressdev_configure(cdev_id, &config) < 0) {
+			RTE_LOG(ERR, USER1, "Device configuration failed\n");
+			return -EINVAL;
+		}
 
-	test_data->decompressed_data = rte_zmalloc_socket(NULL,
-				test_data->input_data_sz, 0,
-				rte_socket_id());
-	if (test_data->decompressed_data == NULL) {
-		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
-				"file could not be allocated\n");
-		return -1;
-	}
+		for (j = 0; j < test_data->nb_qps; j++) {
+			ret = rte_compressdev_queue_pair_setup(cdev_id, j,
+					NUM_MAX_INFLIGHT_OPS, socket_id);
+			if (ret < 0) {
+				RTE_LOG(ERR, USER1,
+			      "Failed to setup queue pair %u on compressdev %u",
+					j, cdev_id);
+				return -EINVAL;
+			}
+		}
 
-	test_data->comp_bufs = rte_zmalloc_socket(NULL,
-			test_data->total_bufs * sizeof(struct rte_mbuf *),
-			0, rte_socket_id());
-	if (test_data->comp_bufs == NULL) {
-		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
-				" could not be allocated\n");
-		return -1;
+		ret = rte_compressdev_start(cdev_id);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to start device %u: error %d\n",
+				cdev_id, ret);
+			return -EPERM;
+		}
 	}
 
-	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
-			test_data->total_bufs * sizeof(struct rte_mbuf *),
-			0, rte_socket_id());
-	if (test_data->decomp_bufs == NULL) {
-		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
-				" could not be allocated\n");
-		return -1;
-	}
-	return 0;
+	return enabled_cdev_count;
 }
 
 static int
@@ -295,187 +281,18 @@  comp_perf_dump_input_data(struct comp_test_data *test_data)
 	return ret;
 }
 
-static int
-comp_perf_initialize_compressdev(struct comp_test_data *test_data)
-{
-	uint8_t enabled_cdev_count;
-	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
-
-	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
-			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
-	if (enabled_cdev_count == 0) {
-		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
-				test_data->driver_name);
-		return -EINVAL;
-	}
-
-	if (enabled_cdev_count > 1)
-		RTE_LOG(INFO, USER1,
-			"Only the first compress device will be used\n");
-
-	test_data->cdev_id = enabled_cdevs[0];
-
-	if (comp_perf_check_capabilities(test_data) < 0)
-		return -1;
-
-	/* Configure compressdev (one device, one queue pair) */
-	struct rte_compressdev_config config = {
-		.socket_id = rte_socket_id(),
-		.nb_queue_pairs = 1,
-		.max_nb_priv_xforms = NUM_MAX_XFORMS,
-		.max_nb_streams = 0
-	};
-
-	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
-		RTE_LOG(ERR, USER1, "Device configuration failed\n");
-		return -1;
-	}
-
-	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
-			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
-		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
-		return -1;
-	}
-
-	if (rte_compressdev_start(test_data->cdev_id) < 0) {
-		RTE_LOG(ERR, USER1, "Device could not be started\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int
-prepare_bufs(struct comp_test_data *test_data)
-{
-	uint32_t remaining_data = test_data->input_data_sz;
-	uint8_t *input_data_ptr = test_data->input_data;
-	size_t data_sz;
-	uint8_t *data_addr;
-	uint32_t i, j;
-
-	for (i = 0; i < test_data->total_bufs; i++) {
-		/* Allocate data in input mbuf and copy data from input file */
-		test_data->decomp_bufs[i] =
-			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
-		if (test_data->decomp_bufs[i] == NULL) {
-			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
-			return -1;
-		}
-
-		cleanup = ST_PREPARE_BUF;
-		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
-		data_addr = (uint8_t *) rte_pktmbuf_append(
-					test_data->decomp_bufs[i], data_sz);
-		if (data_addr == NULL) {
-			RTE_LOG(ERR, USER1, "Could not append data\n");
-			return -1;
-		}
-		rte_memcpy(data_addr, input_data_ptr, data_sz);
-
-		input_data_ptr += data_sz;
-		remaining_data -= data_sz;
-
-		/* Already one segment in the mbuf */
-		uint16_t segs_per_mbuf = 1;
-
-		/* Chain mbufs if needed for input mbufs */
-		while (segs_per_mbuf < test_data->max_sgl_segs
-				&& remaining_data > 0) {
-			struct rte_mbuf *next_seg =
-				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
-
-			if (next_seg == NULL) {
-				RTE_LOG(ERR, USER1,
-					"Could not allocate mbuf\n");
-				return -1;
-			}
-
-			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
-			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
-				data_sz);
-
-			if (data_addr == NULL) {
-				RTE_LOG(ERR, USER1, "Could not append data\n");
-				return -1;
-			}
-
-			rte_memcpy(data_addr, input_data_ptr, data_sz);
-			input_data_ptr += data_sz;
-			remaining_data -= data_sz;
-
-			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
-					next_seg) < 0) {
-				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
-				return -1;
-			}
-			segs_per_mbuf++;
-		}
-
-		/* Allocate data in output mbuf */
-		test_data->comp_bufs[i] =
-			rte_pktmbuf_alloc(test_data->comp_buf_pool);
-		if (test_data->comp_bufs[i] == NULL) {
-			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
-			return -1;
-		}
-		data_addr = (uint8_t *) rte_pktmbuf_append(
-					test_data->comp_bufs[i],
-					test_data->out_seg_sz);
-		if (data_addr == NULL) {
-			RTE_LOG(ERR, USER1, "Could not append data\n");
-			return -1;
-		}
-
-		/* Chain mbufs if needed for output mbufs */
-		for (j = 1; j < segs_per_mbuf; j++) {
-			struct rte_mbuf *next_seg =
-				rte_pktmbuf_alloc(test_data->comp_buf_pool);
-
-			if (next_seg == NULL) {
-				RTE_LOG(ERR, USER1,
-					"Could not allocate mbuf\n");
-				return -1;
-			}
-
-			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
-				test_data->out_seg_sz);
-
-			if (data_addr == NULL) {
-				RTE_LOG(ERR, USER1, "Could not append data\n");
-				return -1;
-			}
-
-			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
-					next_seg) < 0) {
-				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
-				return -1;
-			}
-		}
-	}
-
-	return 0;
-}
-
-static void
-free_bufs(struct comp_test_data *test_data)
-{
-	uint32_t i;
-
-	for (i = 0; i < test_data->total_bufs; i++) {
-		rte_pktmbuf_free(test_data->comp_bufs[i]);
-		rte_pktmbuf_free(test_data->decomp_bufs[i]);
-	}
-}
-
-
-
 int
 main(int argc, char **argv)
 {
-	uint8_t level, level_idx = 0;
+	uint8_t level_idx = 0;
 	int ret, i;
 	struct comp_test_data *test_data;
+	void *ctx[RTE_MAX_LCORE] = {};
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+	int nb_compressdevs = 0;
+	uint16_t total_nb_qps = 0;
+	uint8_t cdev_id;
+	uint32_t lcore_id;
 
 	/* Initialise DPDK EAL */
 	ret = rte_eal_init(argc, argv);
@@ -492,7 +309,7 @@  main(int argc, char **argv)
 				rte_socket_id());
 
 	ret = EXIT_SUCCESS;
-	cleanup = ST_TEST_DATA;
+	test_data->cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
 	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
@@ -507,99 +324,112 @@  main(int argc, char **argv)
 		goto end;
 	}
 
-	if (comp_perf_initialize_compressdev(test_data) < 0) {
-		ret = EXIT_FAILURE;
-		goto end;
-	}
+	nb_compressdevs =
+		comp_perf_initialize_compressdev(test_data, enabled_cdevs);
 
-	cleanup = ST_COMPDEV;
-	if (comp_perf_dump_input_data(test_data) < 0) {
+	if (nb_compressdevs < 1) {
 		ret = EXIT_FAILURE;
 		goto end;
 	}
 
-	cleanup = ST_INPUT_DATA;
-	if (comp_perf_allocate_memory(test_data) < 0) {
+	test_data->cleanup = ST_COMPDEV;
+	if (comp_perf_dump_input_data(test_data) < 0) {
 		ret = EXIT_FAILURE;
 		goto end;
 	}
 
-	if (prepare_bufs(test_data) < 0) {
-		ret = EXIT_FAILURE;
-		goto end;
-	}
+	test_data->cleanup = ST_INPUT_DATA;
 
-	if (test_data->level.inc != 0)
-		level = test_data->level.min;
+	if (test_data->level_lst.inc != 0)
+		test_data->level = test_data->level_lst.min;
 	else
-		level = test_data->level.list[0];
+		test_data->level = test_data->level_lst.list[0];
 
 	printf("App uses socket: %u\n", rte_socket_id());
-	printf("Driver uses socket: %u\n",
-	       rte_compressdev_socket_id(test_data->cdev_id));
 	printf("Burst size = %u\n", test_data->burst_sz);
 	printf("File size = %zu\n", test_data->input_data_sz);
 
-	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
-		"Level", "Comp size", "Comp ratio [%]",
-		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
-		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+	test_data->cleanup = ST_DURING_TEST;
+	total_nb_qps = nb_compressdevs * test_data->nb_qps;
 
-	cleanup = ST_DURING_TEST;
-	while (level <= test_data->level.max) {
+	i = 0;
+	uint8_t qp_id = 0, cdev_index = 0;
 
-		/*
-		 * Run a first iteration, to verify compression and
-		 * get the compression ratio for the level
-		 */
-		if (cperf_verification(test_data, level) != EXIT_SUCCESS)
-			break;
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 
-		/*
-		 * Run benchmarking test
-		 */
-		if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
+		if (i == total_nb_qps)
 			break;
 
-		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
-					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
-		       level, test_data->comp_data_sz, test_data->ratio,
-		       test_data->comp_tsc_duration[level],
-		       test_data->comp_tsc_byte, test_data->comp_gbps,
-		       test_data->decomp_tsc_duration[level],
-		       test_data->decomp_tsc_byte, test_data->decomp_gbps);
+		cdev_id = enabled_cdevs[cdev_index];
+		ctx[i] = cperf_testmap[test_data->test].constructor(
+							cdev_id, qp_id,
+							test_data);
+		if (ctx[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Test run constructor failed\n");
+			goto end;
+		}
+		qp_id = (qp_id + 1) % test_data->nb_qps;
+		if (qp_id == 0)
+			cdev_index++;
+		i++;
+	}
+
+	while (test_data->level <= test_data->level_lst.max) {
+
+		i = 0;
+		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+
+			if (i == total_nb_qps)
+				break;
 
-		if (test_data->level.inc != 0)
-			level += test_data->level.inc;
+			rte_eal_remote_launch(
+					cperf_testmap[test_data->test].runner,
+					ctx[i], lcore_id);
+			i++;
+		}
+		i = 0;
+		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+
+			if (i == total_nb_qps)
+				break;
+			ret |= rte_eal_wait_lcore(lcore_id);
+			i++;
+		}
+
+		if (ret != EXIT_SUCCESS)
+			break;
+
+		if (test_data->level_lst.inc != 0)
+			test_data->level += test_data->level_lst.inc;
 		else {
-			if (++level_idx == test_data->level.count)
+			if (++level_idx == test_data->level_lst.count)
 				break;
-			level = test_data->level.list[level_idx];
+			test_data->level = test_data->level_lst.list[level_idx];
 		}
 	}
 
 end:
-	switch (cleanup) {
+	switch (test_data->cleanup) {
 
 	case ST_DURING_TEST:
-	case ST_PREPARE_BUF:
-		free_bufs(test_data);
-		/* fallthrough */
-	case ST_MEMORY_ALLOC:
-		rte_free(test_data->decomp_bufs);
-		rte_free(test_data->comp_bufs);
-		rte_free(test_data->decompressed_data);
-		rte_free(test_data->compressed_data);
-		rte_mempool_free(test_data->op_pool);
-		rte_mempool_free(test_data->decomp_buf_pool);
-		rte_mempool_free(test_data->comp_buf_pool);
+		i = 0;
+		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+			if (i == total_nb_qps)
+				break;
+
+			if (ctx[i] && cperf_testmap[test_data->test].destructor)
+				cperf_testmap[test_data->test].destructor(
+									ctx[i]);
+			i++;
+		}
 		/* fallthrough */
 	case ST_INPUT_DATA:
 		rte_free(test_data->input_data);
 		/* fallthrough */
 	case ST_COMPDEV:
-		if (test_data->cdev_id != -1)
-			rte_compressdev_stop(test_data->cdev_id);
+		for (i = 0; i < nb_compressdevs &&
+				i < RTE_COMPRESS_MAX_DEVS; i++)
+			rte_compressdev_stop(enabled_cdevs[i]);
 		/* fallthrough */
 	case ST_TEST_DATA:
 		rte_free(test_data);
@@ -616,3 +446,65 @@  main(int argc, char **argv)
 	}
 	return ret;
 }
+
+__rte_weak void *
+cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused)
+{
+	RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n");
+	return NULL;
+}
+
+__rte_weak void
+cperf_benchmark_test_destructor(void *arg __rte_unused)
+{
+
+}
+
+__rte_weak int
+cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+{
+	return 0;
+}
+__rte_weak void *
+cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused)
+{
+	RTE_LOG(INFO, USER1, "Verify test is not supported yet\n");
+	return NULL;
+}
+
+__rte_weak void
+cperf_verify_test_destructor(void *arg __rte_unused)
+{
+
+}
+
+__rte_weak int
+cperf_verify_test_runner(void *test_ctx __rte_unused)
+{
+	return 0;
+}
+
+__rte_weak void *
+cperf_pmd_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused)
+{
+	RTE_LOG(INFO, USER1, "Pmd_cyclecount test is not supported yet\n");
+	return NULL;
+}
+
+__rte_weak void
+cperf_pmd_cyclecount_test_destructor(void *arg __rte_unused)
+{
+
+}
+
+__rte_weak int
+cperf_pmd_cyclecount_test_runner(void *test_ctx __rte_unused)
+{
+	return 0;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index ec73e5e..00413c6 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -4,6 +4,5 @@ 
 allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
 		'main.c',
-		'comp_perf_test_verify.c',
-		'comp_perf_test_benchmark.c')
+		'comp_perf_test_common.c')
 deps = ['compressdev']