[v4,1/1] app/test-compress-perf: report header improvement

Message ID 20190712104321.24845-2-arturx.trybula@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series add extra features to test-compress-perf |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Artur Trybula July 12, 2019, 10:43 a.m. UTC
  This patch adds extra features to the compress performance
test. Some important parameters (memory allocation,
number of ops, number of segments) are calculated and
printed out on the screen.
Information about compression threads is also provided.

Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
---
 .../comp_perf_test_benchmark.c                | 21 ++++-
 .../comp_perf_test_common.c                   | 94 ++++++++++++++++++-
 .../comp_perf_test_common.h                   |  6 ++
 app/test-compress-perf/main.c                 |  4 +-
 4 files changed, 120 insertions(+), 5 deletions(-)
  

Comments

Shally Verma July 15, 2019, 12:47 p.m. UTC | #1
> -----Original Message-----
> From: Artur Trybula <arturx.trybula@intel.com>
> Sent: Friday, July 12, 2019 4:13 PM
> To: dev@dpdk.org; fiona.trahe@intel.com; Shally Verma
> <shallyv@marvell.com>; adamx.dybkowski@intel.com;
> arturx.trybula@intel.com; akhil.goyal@nxp.com
> Subject: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header
> improvement
> 
> External Email
> 
> ----------------------------------------------------------------------
> This patch adds extra features to the compress performance test. Some
> important parameters (memory allocation, number of ops, number of
> segments) are calculated and printed out on the screen.
> Information about compression threads is also provided.
> 
> Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> ---
>  .../comp_perf_test_benchmark.c                | 21 ++++-
>  .../comp_perf_test_common.c                   | 94 ++++++++++++++++++-
>  .../comp_perf_test_common.h                   |  6 ++
>  app/test-compress-perf/main.c                 |  4 +-
>  4 files changed, 120 insertions(+), 5 deletions(-)
> 
> diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c
> b/app/test-compress-perf/comp_perf_test_benchmark.c
> index aa1f8eea2..887459449 100644
> --- a/app/test-compress-perf/comp_perf_test_benchmark.c
> +++ b/app/test-compress-perf/comp_perf_test_benchmark.c
> @@ -329,9 +329,26 @@ cperf_benchmark_test_runner(void *test_ctx)
>  	struct comp_test_data *test_data = ctx->ver.options;
>  	uint32_t lcore = rte_lcore_id();
>  	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> +	int i, ret = EXIT_SUCCESS;
> 
>  	ctx->ver.mem.lcore_id = lcore;
> -	int i, ret = EXIT_SUCCESS;
> +
> +	/*
> +	 * printing information about current compression thread
> +	 */
> +	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
Question: runner() will be executed by each core so is ctx is thread local variable or global.? If it can be made thread local, then there would be no need of atomic here?

> +		printf("    lcore: %u,"
> +				" driver name: %s,"
> +				" device name: %s,"
> +				" device id: %u,"
> +				" socket id: %u,"
> +				" queue pair id: %u\n",
> +			lcore,
> +			ctx->ver.options->driver_name,
> +			rte_compressdev_name_get(ctx->ver.mem.dev_id),
> +			ctx->ver.mem.dev_id,
> +			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
> +			ctx->ver.mem.qp_id);
> 
>  	/*
>  	 * First the verification part is needed @@ -374,7 +391,7 @@
> cperf_benchmark_test_runner(void *test_ctx)
>  			1000000000;
> 
>  	if (rte_atomic16_test_and_set(&display_once)) {
> -		printf("%12s%6s%12s%17s%15s%16s\n",
> +		printf("\n%12s%6s%12s%17s%15s%16s\n",
>  			"lcore id", "Level", "Comp size", "Comp ratio [%]",
>  			"Comp [Gbps]", "Decomp [Gbps]");
>  	}
> diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-
> compress-perf/comp_perf_test_common.c
> index 472c76686..3dc9349b0 100644
> --- a/app/test-compress-perf/comp_perf_test_common.c
> +++ b/app/test-compress-perf/comp_perf_test_common.c
> @@ -16,6 +16,18 @@
> 
>  #define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> 
> +struct performance_tests_results {
> +	uint16_t total_segments;
> +	uint16_t segment_sz;
> +	uint16_t last_segment_sz;
> +	uint32_t total_buffs;	      /*number of buffers = number of ops*/
> +	uint16_t segments_per_buff;
> +	uint16_t segments_per_last_buff;
> +	size_t input_data_sz;
> +};
These looks more like test configuration than result. If you agree, then can rename it to test_config .. 

> +
> +static struct performance_tests_results tests_res;
> +
>  int
>  param_range_check(uint16_t size, const struct rte_param_log2_range
> *range)  { @@ -170,6 +182,13 @@ comp_perf_allocate_memory(struct
> comp_test_data *test_data,
>  				" could not be allocated\n");
>  		return -1;
>  	}
> +
> +	tests_res.total_segments = total_segs;
> +	tests_res.segment_sz = test_data->seg_sz;
> +	tests_res.total_buffs = mem->total_bufs;
> +	tests_res.segments_per_buff = test_data->max_sgl_segs;
> +	tests_res.input_data_sz = test_data->input_data_sz;
> +
>  	return 0;
>  }
> 
> @@ -178,9 +197,10 @@ prepare_bufs(struct comp_test_data *test_data,
> struct cperf_mem_resources *mem)  {
>  	uint32_t remaining_data = test_data->input_data_sz;
>  	uint8_t *input_data_ptr = test_data->input_data;
> -	size_t data_sz;
> +	size_t data_sz = 0;
>  	uint8_t *data_addr;
>  	uint32_t i, j;
> +	uint16_t segs_per_mbuf = 0;
>
Minimum segment per mbuf is 1. Then why don’t we initialize it to 1 here?
 
>  	for (i = 0; i < mem->total_bufs; i++) {
>  		/* Allocate data in input mbuf and copy data from input file
> */ @@ -204,7 +224,7 @@ prepare_bufs(struct comp_test_data *test_data,
> struct cperf_mem_resources *mem)
>  		remaining_data -= data_sz;
> 
>  		/* Already one segment in the mbuf */
> -		uint16_t segs_per_mbuf = 1;
> +		segs_per_mbuf = 1;
> 
>  		/* Chain mbufs if needed for input mbufs */
>  		while (segs_per_mbuf < test_data->max_sgl_segs @@ -
> 281,5 +301,75 @@ prepare_bufs(struct comp_test_data *test_data, struct
> cperf_mem_resources *mem)
>  		}
>  	}
> 
> +	tests_res.segments_per_last_buff = segs_per_mbuf;
> +	tests_res.last_segment_sz = data_sz;
> +
>  	return 0;
>  }
> +
> +void
> +print_test_dynamics(void)
> +{
> +	uint32_t opt_total_segs = DIV_CEIL(tests_res.input_data_sz,
> +			MAX_SEG_SIZE);
> +
> +	if (tests_res.total_buffs > 1) {
> +		printf("\nWarning: for the current input parameters number"
A comma after 'input parameters' would improve readability here

> +				" of ops is higher than one, which may result"
> +				" in sub-optimal performance.\n");
> +		printf("To improve the performance (for the current"
> +				" input data) following parameters are"
> +				" suggested:\n");
> +		printf("	• Segment size: %d\n", MAX_SEG_SIZE);
> +		printf("	• Number of segments: %u\n", opt_total_segs);
> +	} else if (tests_res.total_buffs == 1) {
> +		printf("\nWarning: there is only one op with %u segments –"
May "Warning:" be replaced with "Info: " here.

> +				" the compression ratio is the best.\n",
> +			tests_res.segments_per_last_buff);
> +		if (tests_res.segment_sz < MAX_SEG_SIZE)
> +			printf("To reduce compression time, please use"
> +					" bigger segment size: %d.\n",
> +				MAX_SEG_SIZE);
> +		else if (tests_res.segment_sz == MAX_SEG_SIZE)
> +			printf("Segment size is optimal for the best"
> +					" performance.\n");
> +	} else
> +		printf("Warning: something wrong happened!!\n");
> +
> +	printf("\nFor the current input parameters (segment size = %u,"
> +			" segments number = %u):\n",
> +		tests_res.segment_sz,
> +		tests_res.segments_per_buff);
> +	printf("	• Total number of buffers: %d\n",
> +		tests_res.total_segments);
> +	printf("	• %u buffer(s) %u bytes long, last buffer %u"
> +			" byte(s) long\n",
> +		tests_res.total_segments - 1,
> +		tests_res.segment_sz,
> +		tests_res.last_segment_sz);
> +	printf("	• Number of ops: %u\n", tests_res.total_buffs);
> +	printf("	• Total memory allocation: %u\n",
> +		(tests_res.total_segments - 1) * tests_res.segment_sz
> +		+ tests_res.last_segment_sz);
> +	if (tests_res.total_buffs > 1)
> +		printf("	• %u ops: %u segment(s) in each,"
> +				" segment size %u\n",
> +			tests_res.total_buffs - 1,
> +			tests_res.segments_per_buff,
> +			tests_res.segment_sz);
> +	if (tests_res.segments_per_last_buff > 1) {
> +		printf("	• 1 op %u segments:\n",
> +				tests_res.segments_per_last_buff);
> +		printf("		o %u segment size %u\n",
> +			tests_res.segments_per_last_buff - 1,
> +			tests_res.segment_sz);
> +		printf("		o last segment size %u\n",
> +			tests_res.last_segment_sz);
> +	} else if (tests_res.segments_per_last_buff == 1) {
> +		printf("	• 1 op (the last one): %u segment %u"
> +				" byte(s) long\n\n",
> +			tests_res.segments_per_last_buff,
> +			tests_res.last_segment_sz);
> +	}
Probably this if and else if here can be replaced by just 1 statement.


> +	printf("\n");
> +}
> diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-
> compress-perf/comp_perf_test_common.h
> index 9c11e3a00..c9e0c9081 100644
> --- a/app/test-compress-perf/comp_perf_test_common.h
> +++ b/app/test-compress-perf/comp_perf_test_common.h
> @@ -13,6 +13,9 @@ struct cperf_mem_resources {
>  	uint8_t dev_id;
>  	uint16_t qp_id;
>  	uint8_t lcore_id;
> +
> +	rte_atomic16_t print_info_once;
> +
>  	uint32_t total_bufs;
>  	uint8_t *compressed_data;
>  	uint8_t *decompressed_data;
> @@ -38,4 +41,7 @@ comp_perf_allocate_memory(struct comp_test_data
> *test_data,  int  prepare_bufs(struct comp_test_data *test_data, struct

> cperf_mem_resources *mem);
> 
> +void
> +print_test_dynamics(void);
> +
>  #endif /* _COMP_PERF_TEST_COMMON_H_ */
> diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
> index e746e4708..e7ac412e6 100644
> --- a/app/test-compress-perf/main.c
> +++ b/app/test-compress-perf/main.c
> @@ -363,7 +363,7 @@ main(int argc, char **argv)
> 
>  	printf("App uses socket: %u\n", rte_socket_id());
>  	printf("Burst size = %u\n", test_data->burst_sz);
> -	printf("File size = %zu\n", test_data->input_data_sz);
> +	printf("Input data size = %zu\n", test_data->input_data_sz);
> 
>  	test_data->cleanup = ST_DURING_TEST;
>  	total_nb_qps = nb_compressdevs * test_data->nb_qps; @@ -390,6
> +390,8 @@ main(int argc, char **argv)
>  		i++;
>  	}
> 
> +	print_test_dynamics(); /* constructors must be executed first */
> +
>  	while (test_data->level <= test_data->level_lst.max) {
> 
>  		i = 0;
> --
> 2.17.1
  
Artur Trybula July 15, 2019, 2:01 p.m. UTC | #2
-----Original Message-----
From: Shally Verma [mailto:shallyv@marvell.com] 
Sent: Monday, July 15, 2019 14:47
To: Trybula, ArturX <arturx.trybula@intel.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; Dybkowski, AdamX <adamx.dybkowski@intel.com>; akhil.goyal@nxp.com
Subject: RE: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header improvement



> -----Original Message-----
> From: Artur Trybula <arturx.trybula@intel.com>
> Sent: Friday, July 12, 2019 4:13 PM
> To: dev@dpdk.org; fiona.trahe@intel.com; Shally Verma 
> <shallyv@marvell.com>; adamx.dybkowski@intel.com; 
> arturx.trybula@intel.com; akhil.goyal@nxp.com
> Subject: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header 
> improvement
> 
> External Email
> 
> ----------------------------------------------------------------------
> This patch adds extra features to the compress performance test. Some 
> important parameters (memory allocation, number of ops, number of
> segments) are calculated and printed out on the screen.
> Information about compression threads is also provided.
> 
> Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> ---
>  .../comp_perf_test_benchmark.c                | 21 ++++-
>  .../comp_perf_test_common.c                   | 94 ++++++++++++++++++-
>  .../comp_perf_test_common.h                   |  6 ++
>  app/test-compress-perf/main.c                 |  4 +-
>  4 files changed, 120 insertions(+), 5 deletions(-)
> 
> diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c
> b/app/test-compress-perf/comp_perf_test_benchmark.c
> index aa1f8eea2..887459449 100644
> --- a/app/test-compress-perf/comp_perf_test_benchmark.c
> +++ b/app/test-compress-perf/comp_perf_test_benchmark.c
> @@ -329,9 +329,26 @@ cperf_benchmark_test_runner(void *test_ctx)
>  	struct comp_test_data *test_data = ctx->ver.options;
>  	uint32_t lcore = rte_lcore_id();
>  	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> +	int i, ret = EXIT_SUCCESS;
> 
>  	ctx->ver.mem.lcore_id = lcore;
> -	int i, ret = EXIT_SUCCESS;
> +
> +	/*
> +	 * printing information about current compression thread
> +	 */
> +	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
Question: runner() will be executed by each core so is ctx is thread local variable or global.? If it can be made thread local, then there would be no need of atomic here?
[Artur] Runners are restarted for each compression level. It's enough to have the data printed only once. 

> +		printf("    lcore: %u,"
> +				" driver name: %s,"
> +				" device name: %s,"
> +				" device id: %u,"
> +				" socket id: %u,"
> +				" queue pair id: %u\n",
> +			lcore,
> +			ctx->ver.options->driver_name,
> +			rte_compressdev_name_get(ctx->ver.mem.dev_id),
> +			ctx->ver.mem.dev_id,
> +			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
> +			ctx->ver.mem.qp_id);
> 
>  	/*
>  	 * First the verification part is needed @@ -374,7 +391,7 @@ 
> cperf_benchmark_test_runner(void *test_ctx)
>  			1000000000;
> 
>  	if (rte_atomic16_test_and_set(&display_once)) {
> -		printf("%12s%6s%12s%17s%15s%16s\n",
> +		printf("\n%12s%6s%12s%17s%15s%16s\n",
>  			"lcore id", "Level", "Comp size", "Comp ratio [%]",
>  			"Comp [Gbps]", "Decomp [Gbps]");
>  	}
> diff --git a/app/test-compress-perf/comp_perf_test_common.c 
> b/app/test- compress-perf/comp_perf_test_common.c
> index 472c76686..3dc9349b0 100644
> --- a/app/test-compress-perf/comp_perf_test_common.c
> +++ b/app/test-compress-perf/comp_perf_test_common.c
> @@ -16,6 +16,18 @@
> 
>  #define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> 
> +struct performance_tests_results {
> +	uint16_t total_segments;
> +	uint16_t segment_sz;
> +	uint16_t last_segment_sz;
> +	uint32_t total_buffs;	      /*number of buffers = number of ops*/
> +	uint16_t segments_per_buff;
> +	uint16_t segments_per_last_buff;
> +	size_t input_data_sz;
> +};
These looks more like test configuration than result. If you agree, then can rename it to test_config .. 
[Artur] These are exactly tests results, internal variables used by the algorithm. A kind of snapshot. Config by definition is used for configuration. In this case all the fields are only printed out.

> +
> +static struct performance_tests_results tests_res;
> +
>  int
>  param_range_check(uint16_t size, const struct rte_param_log2_range
> *range)  { @@ -170,6 +182,13 @@ comp_perf_allocate_memory(struct 
> comp_test_data *test_data,
>  				" could not be allocated\n");
>  		return -1;
>  	}
> +
> +	tests_res.total_segments = total_segs;
> +	tests_res.segment_sz = test_data->seg_sz;
> +	tests_res.total_buffs = mem->total_bufs;
> +	tests_res.segments_per_buff = test_data->max_sgl_segs;
> +	tests_res.input_data_sz = test_data->input_data_sz;
> +
>  	return 0;
>  }
> 
> @@ -178,9 +197,10 @@ prepare_bufs(struct comp_test_data *test_data, 
> struct cperf_mem_resources *mem)  {
>  	uint32_t remaining_data = test_data->input_data_sz;
>  	uint8_t *input_data_ptr = test_data->input_data;
> -	size_t data_sz;
> +	size_t data_sz = 0;
>  	uint8_t *data_addr;
>  	uint32_t i, j;
> +	uint16_t segs_per_mbuf = 0;
>
Minimum segment per mbuf is 1. Then why don’t we initialize it to 1 here?
[Artur] You are right Shally, if everything is fine with the input params and with the algorithm, but it should be left 0 in case of problems with mbufs allocation (current implementation).

 
>  	for (i = 0; i < mem->total_bufs; i++) {
>  		/* Allocate data in input mbuf and copy data from input file */ @@ 
> -204,7 +224,7 @@ prepare_bufs(struct comp_test_data *test_data, struct 
> cperf_mem_resources *mem)
>  		remaining_data -= data_sz;
> 
>  		/* Already one segment in the mbuf */
> -		uint16_t segs_per_mbuf = 1;
> +		segs_per_mbuf = 1;
> 
>  		/* Chain mbufs if needed for input mbufs */
>  		while (segs_per_mbuf < test_data->max_sgl_segs @@ -
> 281,5 +301,75 @@ prepare_bufs(struct comp_test_data *test_data, struct 
> cperf_mem_resources *mem)
>  		}
>  	}
> 
> +	tests_res.segments_per_last_buff = segs_per_mbuf;
> +	tests_res.last_segment_sz = data_sz;
> +
>  	return 0;
>  }
> +
> +void
> +print_test_dynamics(void)
> +{
> +	uint32_t opt_total_segs = DIV_CEIL(tests_res.input_data_sz,
> +			MAX_SEG_SIZE);
> +
> +	if (tests_res.total_buffs > 1) {
> +		printf("\nWarning: for the current input parameters number"
A comma after 'input parameters' would improve readability here
[Artur] - Ok

> +				" of ops is higher than one, which may result"
> +				" in sub-optimal performance.\n");
> +		printf("To improve the performance (for the current"
> +				" input data) following parameters are"
> +				" suggested:\n");
> +		printf("	• Segment size: %d\n", MAX_SEG_SIZE);
> +		printf("	• Number of segments: %u\n", opt_total_segs);
> +	} else if (tests_res.total_buffs == 1) {
> +		printf("\nWarning: there is only one op with %u segments –"
May "Warning:" be replaced with "Info: " here.
[Artur] Ok

> +				" the compression ratio is the best.\n",
> +			tests_res.segments_per_last_buff);
> +		if (tests_res.segment_sz < MAX_SEG_SIZE)
> +			printf("To reduce compression time, please use"
> +					" bigger segment size: %d.\n",
> +				MAX_SEG_SIZE);
> +		else if (tests_res.segment_sz == MAX_SEG_SIZE)
> +			printf("Segment size is optimal for the best"
> +					" performance.\n");
> +	} else
> +		printf("Warning: something wrong happened!!\n");
> +
> +	printf("\nFor the current input parameters (segment size = %u,"
> +			" segments number = %u):\n",
> +		tests_res.segment_sz,
> +		tests_res.segments_per_buff);
> +	printf("	• Total number of buffers: %d\n",
> +		tests_res.total_segments);
> +	printf("	• %u buffer(s) %u bytes long, last buffer %u"
> +			" byte(s) long\n",
> +		tests_res.total_segments - 1,
> +		tests_res.segment_sz,
> +		tests_res.last_segment_sz);
> +	printf("	• Number of ops: %u\n", tests_res.total_buffs);
> +	printf("	• Total memory allocation: %u\n",
> +		(tests_res.total_segments - 1) * tests_res.segment_sz
> +		+ tests_res.last_segment_sz);
> +	if (tests_res.total_buffs > 1)
> +		printf("	• %u ops: %u segment(s) in each,"
> +				" segment size %u\n",
> +			tests_res.total_buffs - 1,
> +			tests_res.segments_per_buff,
> +			tests_res.segment_sz);
> +	if (tests_res.segments_per_last_buff > 1) {
> +		printf("	• 1 op %u segments:\n",
> +				tests_res.segments_per_last_buff);
> +		printf("		o %u segment size %u\n",
> +			tests_res.segments_per_last_buff - 1,
> +			tests_res.segment_sz);
> +		printf("		o last segment size %u\n",
> +			tests_res.last_segment_sz);
> +	} else if (tests_res.segments_per_last_buff == 1) {
> +		printf("	• 1 op (the last one): %u segment %u"
> +				" byte(s) long\n\n",
> +			tests_res.segments_per_last_buff,
> +			tests_res.last_segment_sz);
> +	}
Probably this if and else if here can be replaced by just 1 statement.
[Artur] I think it's ok. This version is clear. If you like I can remove {} brackets from the "else if".


> +	printf("\n");
> +}
> diff --git a/app/test-compress-perf/comp_perf_test_common.h 
> b/app/test- compress-perf/comp_perf_test_common.h
> index 9c11e3a00..c9e0c9081 100644
> --- a/app/test-compress-perf/comp_perf_test_common.h
> +++ b/app/test-compress-perf/comp_perf_test_common.h
> @@ -13,6 +13,9 @@ struct cperf_mem_resources {
>  	uint8_t dev_id;
>  	uint16_t qp_id;
>  	uint8_t lcore_id;
> +
> +	rte_atomic16_t print_info_once;
> +
>  	uint32_t total_bufs;
>  	uint8_t *compressed_data;
>  	uint8_t *decompressed_data;
> @@ -38,4 +41,7 @@ comp_perf_allocate_memory(struct comp_test_data 
> *test_data,  int  prepare_bufs(struct comp_test_data *test_data, 
> struct

> cperf_mem_resources *mem);
> 
> +void
> +print_test_dynamics(void);
> +
>  #endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git 
> a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 
> e746e4708..e7ac412e6 100644
> --- a/app/test-compress-perf/main.c
> +++ b/app/test-compress-perf/main.c
> @@ -363,7 +363,7 @@ main(int argc, char **argv)
> 
>  	printf("App uses socket: %u\n", rte_socket_id());
>  	printf("Burst size = %u\n", test_data->burst_sz);
> -	printf("File size = %zu\n", test_data->input_data_sz);
> +	printf("Input data size = %zu\n", test_data->input_data_sz);
> 
>  	test_data->cleanup = ST_DURING_TEST;
>  	total_nb_qps = nb_compressdevs * test_data->nb_qps; @@ -390,6
> +390,8 @@ main(int argc, char **argv)
>  		i++;
>  	}
> 
> +	print_test_dynamics(); /* constructors must be executed first */
> +
>  	while (test_data->level <= test_data->level_lst.max) {
> 
>  		i = 0;
> --
> 2.17.1
  
Fiona Trahe July 16, 2019, 1:49 p.m. UTC | #3
Hi Artur, Shally,

> -----Original Message-----
> From: Trybula, ArturX
> Sent: Monday, July 15, 2019 3:01 PM
> To: Shally Verma <shallyv@marvell.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
> Dybkowski, AdamX <adamx.dybkowski@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header improvement
> 
> -----Original Message-----
> From: Shally Verma [mailto:shallyv@marvell.com]
> Sent: Monday, July 15, 2019 14:47
> To: Trybula, ArturX <arturx.trybula@intel.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
> Dybkowski, AdamX <adamx.dybkowski@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header improvement
> 
> 
> 
> > -----Original Message-----
> > From: Artur Trybula <arturx.trybula@intel.com>
> > Sent: Friday, July 12, 2019 4:13 PM
> > To: dev@dpdk.org; fiona.trahe@intel.com; Shally Verma
> > <shallyv@marvell.com>; adamx.dybkowski@intel.com;
> > arturx.trybula@intel.com; akhil.goyal@nxp.com
> > Subject: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header
> > improvement
> >
> > External Email
> >
> > ----------------------------------------------------------------------
> > This patch adds extra features to the compress performance test. Some
> > important parameters (memory allocation, number of ops, number of
> > segments) are calculated and printed out on the screen.
> > Information about compression threads is also provided.
> >
> > Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> > ---
> >  .../comp_perf_test_benchmark.c                | 21 ++++-
> >  .../comp_perf_test_common.c                   | 94 ++++++++++++++++++-
> >  .../comp_perf_test_common.h                   |  6 ++
> >  app/test-compress-perf/main.c                 |  4 +-
> >  4 files changed, 120 insertions(+), 5 deletions(-)
> >
> > diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c
> > b/app/test-compress-perf/comp_perf_test_benchmark.c
> > index aa1f8eea2..887459449 100644
> > --- a/app/test-compress-perf/comp_perf_test_benchmark.c
> > +++ b/app/test-compress-perf/comp_perf_test_benchmark.c
> > @@ -329,9 +329,26 @@ cperf_benchmark_test_runner(void *test_ctx)
> >  	struct comp_test_data *test_data = ctx->ver.options;
> >  	uint32_t lcore = rte_lcore_id();
> >  	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> > +	int i, ret = EXIT_SUCCESS;
> >
> >  	ctx->ver.mem.lcore_id = lcore;
> > -	int i, ret = EXIT_SUCCESS;
> > +
> > +	/*
> > +	 * printing information about current compression thread
> > +	 */
> > +	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> Question: runner() will be executed by each core so is ctx is thread local variable or global.? If it can be
> made thread local, then there would be no need of atomic here?
> [Artur] Runners are restarted for each compression level. It's enough to have the data printed only
> once.
[Fiona] added clarification - we originally did as you suggest, but in cases where there are multiple params, e.g. level 1-9, the thread is started/stopped for each level, so the header was printing out many times - always with the same data. So we needed the atomic to resolve.

> 
> > +		printf("    lcore: %u,"
> > +				" driver name: %s,"
> > +				" device name: %s,"
> > +				" device id: %u,"
> > +				" socket id: %u,"
> > +				" queue pair id: %u\n",
> > +			lcore,
> > +			ctx->ver.options->driver_name,
> > +			rte_compressdev_name_get(ctx->ver.mem.dev_id),
> > +			ctx->ver.mem.dev_id,
> > +			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
> > +			ctx->ver.mem.qp_id);
> >
> >  	/*
> >  	 * First the verification part is needed @@ -374,7 +391,7 @@
> > cperf_benchmark_test_runner(void *test_ctx)
> >  			1000000000;
> >
> >  	if (rte_atomic16_test_and_set(&display_once)) {
> > -		printf("%12s%6s%12s%17s%15s%16s\n",
> > +		printf("\n%12s%6s%12s%17s%15s%16s\n",
> >  			"lcore id", "Level", "Comp size", "Comp ratio [%]",
> >  			"Comp [Gbps]", "Decomp [Gbps]");
> >  	}
> > diff --git a/app/test-compress-perf/comp_perf_test_common.c
> > b/app/test- compress-perf/comp_perf_test_common.c
> > index 472c76686..3dc9349b0 100644
> > --- a/app/test-compress-perf/comp_perf_test_common.c
> > +++ b/app/test-compress-perf/comp_perf_test_common.c
> > @@ -16,6 +16,18 @@
> >
> >  #define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >
> > +struct performance_tests_results {
> > +	uint16_t total_segments;
> > +	uint16_t segment_sz;
> > +	uint16_t last_segment_sz;
> > +	uint32_t total_buffs;	      /*number of buffers = number of ops*/
> > +	uint16_t segments_per_buff;
> > +	uint16_t segments_per_last_buff;
> > +	size_t input_data_sz;
> > +};
> These looks more like test configuration than result. If you agree, then can rename it to test_config ..
> [Artur] These are exactly tests results, internal variables used by the algorithm. A kind of snapshot.
> Config by definition is used for configuration. In this case all the fields are only printed out.
[Fiona] I can see the confusion, it's not the input config - but it's also confusing to call it results, as that implies the throughput and ratio numbers. How about cperf_buffer_info?
> 
> > +
> > +static struct performance_tests_results tests_res;
> > +
> >  int
> >  param_range_check(uint16_t size, const struct rte_param_log2_range
> > *range)  { @@ -170,6 +182,13 @@ comp_perf_allocate_memory(struct
> > comp_test_data *test_data,
> >  				" could not be allocated\n");
> >  		return -1;
> >  	}
> > +
> > +	tests_res.total_segments = total_segs;
> > +	tests_res.segment_sz = test_data->seg_sz;
> > +	tests_res.total_buffs = mem->total_bufs;
> > +	tests_res.segments_per_buff = test_data->max_sgl_segs;
> > +	tests_res.input_data_sz = test_data->input_data_sz;
> > +
> >  	return 0;
> >  }
> >
> > @@ -178,9 +197,10 @@ prepare_bufs(struct comp_test_data *test_data,
> > struct cperf_mem_resources *mem)  {
> >  	uint32_t remaining_data = test_data->input_data_sz;
> >  	uint8_t *input_data_ptr = test_data->input_data;
> > -	size_t data_sz;
> > +	size_t data_sz = 0;
> >  	uint8_t *data_addr;
> >  	uint32_t i, j;
> > +	uint16_t segs_per_mbuf = 0;
> >
> Minimum segment per mbuf is 1. Then why don’t we initialize it to 1 here?
> [Artur] You are right Shally, if everything is fine with the input params and with the algorithm, but it
> should be left 0 in case of problems with mbufs allocation (current implementation).
> 
> 
> >  	for (i = 0; i < mem->total_bufs; i++) {
> >  		/* Allocate data in input mbuf and copy data from input file */ @@
> > -204,7 +224,7 @@ prepare_bufs(struct comp_test_data *test_data, struct
> > cperf_mem_resources *mem)
> >  		remaining_data -= data_sz;
> >
> >  		/* Already one segment in the mbuf */
> > -		uint16_t segs_per_mbuf = 1;
> > +		segs_per_mbuf = 1;
> >
> >  		/* Chain mbufs if needed for input mbufs */
> >  		while (segs_per_mbuf < test_data->max_sgl_segs @@ -
> > 281,5 +301,75 @@ prepare_bufs(struct comp_test_data *test_data, struct
> > cperf_mem_resources *mem)
> >  		}
> >  	}
> >
> > +	tests_res.segments_per_last_buff = segs_per_mbuf;
> > +	tests_res.last_segment_sz = data_sz;
> > +
> >  	return 0;
> >  }
> > +
> > +void
> > +print_test_dynamics(void)
> > +{
> > +	uint32_t opt_total_segs = DIV_CEIL(tests_res.input_data_sz,
> > +			MAX_SEG_SIZE);
> > +
> > +	if (tests_res.total_buffs > 1) {
> > +		printf("\nWarning: for the current input parameters number"
> A comma after 'input parameters' would improve readability here
> [Artur] - Ok
> 
> > +				" of ops is higher than one, which may result"
> > +				" in sub-optimal performance.\n");
> > +		printf("To improve the performance (for the current"
> > +				" input data) following parameters are"
> > +				" suggested:\n");
> > +		printf("	• Segment size: %d\n", MAX_SEG_SIZE);
> > +		printf("	• Number of segments: %u\n", opt_total_segs);
> > +	} else if (tests_res.total_buffs == 1) {
> > +		printf("\nWarning: there is only one op with %u segments –"
> May "Warning:" be replaced with "Info: " here.
> [Artur] Ok
> 
> > +				" the compression ratio is the best.\n",
> > +			tests_res.segments_per_last_buff);
> > +		if (tests_res.segment_sz < MAX_SEG_SIZE)
> > +			printf("To reduce compression time, please use"
> > +					" bigger segment size: %d.\n",
> > +				MAX_SEG_SIZE);
> > +		else if (tests_res.segment_sz == MAX_SEG_SIZE)
> > +			printf("Segment size is optimal for the best"
> > +					" performance.\n");
> > +	} else
> > +		printf("Warning: something wrong happened!!\n");
> > +
> > +	printf("\nFor the current input parameters (segment size = %u,"
> > +			" segments number = %u):\n",
> > +		tests_res.segment_sz,
> > +		tests_res.segments_per_buff);
> > +	printf("	• Total number of buffers: %d\n",
> > +		tests_res.total_segments);
> > +	printf("	• %u buffer(s) %u bytes long, last buffer %u"
> > +			" byte(s) long\n",
> > +		tests_res.total_segments - 1,
> > +		tests_res.segment_sz,
> > +		tests_res.last_segment_sz);
> > +	printf("	• Number of ops: %u\n", tests_res.total_buffs);
> > +	printf("	• Total memory allocation: %u\n",
> > +		(tests_res.total_segments - 1) * tests_res.segment_sz
> > +		+ tests_res.last_segment_sz);
> > +	if (tests_res.total_buffs > 1)
> > +		printf("	• %u ops: %u segment(s) in each,"
> > +				" segment size %u\n",
> > +			tests_res.total_buffs - 1,
> > +			tests_res.segments_per_buff,
> > +			tests_res.segment_sz);
> > +	if (tests_res.segments_per_last_buff > 1) {
> > +		printf("	• 1 op %u segments:\n",
> > +				tests_res.segments_per_last_buff);
> > +		printf("		o %u segment size %u\n",
> > +			tests_res.segments_per_last_buff - 1,
> > +			tests_res.segment_sz);
> > +		printf("		o last segment size %u\n",
> > +			tests_res.last_segment_sz);
> > +	} else if (tests_res.segments_per_last_buff == 1) {
> > +		printf("	• 1 op (the last one): %u segment %u"
> > +				" byte(s) long\n\n",
> > +			tests_res.segments_per_last_buff,
> > +			tests_res.last_segment_sz);
> > +	}
> Probably this if and else if here can be replaced by just 1 statement.
> [Artur] I think it's ok. This version is clear. If you like I can remove {} brackets from the "else if".
[Fiona] I think it's ok as is - there are different output whether >1 or ==1, so better to leave as separate paths.
> 
> 
> > +	printf("\n");
> > +}
> > diff --git a/app/test-compress-perf/comp_perf_test_common.h
> > b/app/test- compress-perf/comp_perf_test_common.h
> > index 9c11e3a00..c9e0c9081 100644
> > --- a/app/test-compress-perf/comp_perf_test_common.h
> > +++ b/app/test-compress-perf/comp_perf_test_common.h
> > @@ -13,6 +13,9 @@ struct cperf_mem_resources {
> >  	uint8_t dev_id;
> >  	uint16_t qp_id;
> >  	uint8_t lcore_id;
> > +
> > +	rte_atomic16_t print_info_once;
> > +
> >  	uint32_t total_bufs;
> >  	uint8_t *compressed_data;
> >  	uint8_t *decompressed_data;
> > @@ -38,4 +41,7 @@ comp_perf_allocate_memory(struct comp_test_data
> > *test_data,  int  prepare_bufs(struct comp_test_data *test_data,
> > struct
> 
> > cperf_mem_resources *mem);
> >
> > +void
> > +print_test_dynamics(void);
> > +
> >  #endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git
> > a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index
> > e746e4708..e7ac412e6 100644
> > --- a/app/test-compress-perf/main.c
> > +++ b/app/test-compress-perf/main.c
> > @@ -363,7 +363,7 @@ main(int argc, char **argv)
> >
> >  	printf("App uses socket: %u\n", rte_socket_id());
> >  	printf("Burst size = %u\n", test_data->burst_sz);
> > -	printf("File size = %zu\n", test_data->input_data_sz);
> > +	printf("Input data size = %zu\n", test_data->input_data_sz);
> >
> >  	test_data->cleanup = ST_DURING_TEST;
> >  	total_nb_qps = nb_compressdevs * test_data->nb_qps; @@ -390,6
> > +390,8 @@ main(int argc, char **argv)
> >  		i++;
> >  	}
> >
> > +	print_test_dynamics(); /* constructors must be executed first */
> > +
> >  	while (test_data->level <= test_data->level_lst.max) {
> >
> >  		i = 0;
> > --
> > 2.17.1
  
Shally Verma July 17, 2019, 7:21 a.m. UTC | #4
> -----Original Message-----
> From: Trahe, Fiona <fiona.trahe@intel.com>
> Sent: Tuesday, July 16, 2019 7:20 PM
> To: Trybula, ArturX <arturx.trybula@intel.com>; Shally Verma
> <shallyv@marvell.com>; dev@dpdk.org; Dybkowski, AdamX
> <adamx.dybkowski@intel.com>; akhil.goyal@nxp.com
> Cc: Trahe, Fiona <fiona.trahe@intel.com>
> Subject: RE: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header
> improvement
> 
....

> >
> > > -----Original Message-----
> > > From: Artur Trybula <arturx.trybula@intel.com>
> > > Sent: Friday, July 12, 2019 4:13 PM
> > > To: dev@dpdk.org; fiona.trahe@intel.com; Shally Verma
> > > <shallyv@marvell.com>; adamx.dybkowski@intel.com;
> > > arturx.trybula@intel.com; akhil.goyal@nxp.com
> > > Subject: [EXT] [PATCH v4 1/1] app/test-compress-perf: report header
> > > improvement
> > >
> > > External Email
> > >
> > > --------------------------------------------------------------------
> > > -- This patch adds extra features to the compress performance test.
> > > Some important parameters (memory allocation, number of ops, number
> > > of
> > > segments) are calculated and printed out on the screen.
> > > Information about compression threads is also provided.
> > >
> > > Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> > > ---
> > >  .../comp_perf_test_benchmark.c                | 21 ++++-
> > >  .../comp_perf_test_common.c                   | 94 ++++++++++++++++++-
> > >  .../comp_perf_test_common.h                   |  6 ++
> > >  app/test-compress-perf/main.c                 |  4 +-
> > >  4 files changed, 120 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c
> > > b/app/test-compress-perf/comp_perf_test_benchmark.c
> > > index aa1f8eea2..887459449 100644
> > > --- a/app/test-compress-perf/comp_perf_test_benchmark.c
> > > +++ b/app/test-compress-perf/comp_perf_test_benchmark.c
> > > @@ -329,9 +329,26 @@ cperf_benchmark_test_runner(void *test_ctx)
> > >  	struct comp_test_data *test_data = ctx->ver.options;
> > >  	uint32_t lcore = rte_lcore_id();
> > >  	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
> > > +	int i, ret = EXIT_SUCCESS;
> > >
> > >  	ctx->ver.mem.lcore_id = lcore;
> > > -	int i, ret = EXIT_SUCCESS;
> > > +
> > > +	/*
> > > +	 * printing information about current compression thread
> > > +	 */
> > > +	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
> > Question: runner() will be executed by each core so is ctx is thread
> > local variable or global.? If it can be made thread local, then there would be
> no need of atomic here?
> > [Artur] Runners are restarted for each compression level. It's enough
> > to have the data printed only once.
> [Fiona] added clarification - we originally did as you suggest, but in cases
> where there are multiple params, e.g. level 1-9, the thread is
> started/stopped for each level, so the header was printing out many times -
> always with the same data. So we needed the atomic to resolve.
> 
[Shally] Okay. 

> >
....

> > > diff --git a/app/test-compress-perf/comp_perf_test_common.c
> > > b/app/test- compress-perf/comp_perf_test_common.c
> > > index 472c76686..3dc9349b0 100644
> > > --- a/app/test-compress-perf/comp_perf_test_common.c
> > > +++ b/app/test-compress-perf/comp_perf_test_common.c
> > > @@ -16,6 +16,18 @@
> > >
> > >  #define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> > >
> > > +struct performance_tests_results {
> > > +	uint16_t total_segments;
> > > +	uint16_t segment_sz;
> > > +	uint16_t last_segment_sz;
> > > +	uint32_t total_buffs;	      /*number of buffers = number of ops*/
> > > +	uint16_t segments_per_buff;
> > > +	uint16_t segments_per_last_buff;
> > > +	size_t input_data_sz;
> > > +};
> > These looks more like test configuration than result. If you agree, then can
> rename it to test_config ..
> > [Artur] These are exactly tests results, internal variables used by the
> algorithm. A kind of snapshot.
> > Config by definition is used for configuration. In this case all the fields are
> only printed out.
> [Fiona] I can see the confusion, it's not the input config - but it's also
> confusing to call it results, as that implies the throughput and ratio numbers.
> How about cperf_buffer_info?
[Shally] this sound better.

> >
...

> > > +	if (tests_res.segments_per_last_buff > 1) {
> > > +		printf("	• 1 op %u segments:\n",
> > > +				tests_res.segments_per_last_buff);
> > > +		printf("		o %u segment size %u\n",
> > > +			tests_res.segments_per_last_buff - 1,
> > > +			tests_res.segment_sz);
> > > +		printf("		o last segment size %u\n",
> > > +			tests_res.last_segment_sz);
> > > +	} else if (tests_res.segments_per_last_buff == 1) {
> > > +		printf("	• 1 op (the last one): %u segment %u"
> > > +				" byte(s) long\n\n",
> > > +			tests_res.segments_per_last_buff,
> > > +			tests_res.last_segment_sz);
> > > +	}
> > Probably this if and else if here can be replaced by just 1 statement.
> > [Artur] I think it's ok. This version is clear. If you like I can remove {}
> brackets from the "else if".
> [Fiona] I think it's ok as is - there are different output whether >1 or ==1, so
> better to leave as separate paths.
[Shally] Okay. That was just my 2 cents. Am fine it "as is".

> >
..
> > > 2.17.1
  

Patch

diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c
index aa1f8eea2..887459449 100644
--- a/app/test-compress-perf/comp_perf_test_benchmark.c
+++ b/app/test-compress-perf/comp_perf_test_benchmark.c
@@ -329,9 +329,26 @@  cperf_benchmark_test_runner(void *test_ctx)
 	struct comp_test_data *test_data = ctx->ver.options;
 	uint32_t lcore = rte_lcore_id();
 	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+	int i, ret = EXIT_SUCCESS;
 
 	ctx->ver.mem.lcore_id = lcore;
-	int i, ret = EXIT_SUCCESS;
+
+	/*
+	 * printing information about current compression thread
+	 */
+	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+		printf("    lcore: %u,"
+				" driver name: %s,"
+				" device name: %s,"
+				" device id: %u,"
+				" socket id: %u,"
+				" queue pair id: %u\n",
+			lcore,
+			ctx->ver.options->driver_name,
+			rte_compressdev_name_get(ctx->ver.mem.dev_id),
+			ctx->ver.mem.dev_id,
+			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+			ctx->ver.mem.qp_id);
 
 	/*
 	 * First the verification part is needed
@@ -374,7 +391,7 @@  cperf_benchmark_test_runner(void *test_ctx)
 			1000000000;
 
 	if (rte_atomic16_test_and_set(&display_once)) {
-		printf("%12s%6s%12s%17s%15s%16s\n",
+		printf("\n%12s%6s%12s%17s%15s%16s\n",
 			"lcore id", "Level", "Comp size", "Comp ratio [%]",
 			"Comp [Gbps]", "Decomp [Gbps]");
 	}
diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c
index 472c76686..3dc9349b0 100644
--- a/app/test-compress-perf/comp_perf_test_common.c
+++ b/app/test-compress-perf/comp_perf_test_common.c
@@ -16,6 +16,18 @@ 
 
 #define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
 
+struct performance_tests_results {
+	uint16_t total_segments;
+	uint16_t segment_sz;
+	uint16_t last_segment_sz;
+	uint32_t total_buffs;	      /*number of buffers = number of ops*/
+	uint16_t segments_per_buff;
+	uint16_t segments_per_last_buff;
+	size_t input_data_sz;
+};
+
+static struct performance_tests_results tests_res;
+
 int
 param_range_check(uint16_t size, const struct rte_param_log2_range *range)
 {
@@ -170,6 +182,13 @@  comp_perf_allocate_memory(struct comp_test_data *test_data,
 				" could not be allocated\n");
 		return -1;
 	}
+
+	tests_res.total_segments = total_segs;
+	tests_res.segment_sz = test_data->seg_sz;
+	tests_res.total_buffs = mem->total_bufs;
+	tests_res.segments_per_buff = test_data->max_sgl_segs;
+	tests_res.input_data_sz = test_data->input_data_sz;
+
 	return 0;
 }
 
@@ -178,9 +197,10 @@  prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
 {
 	uint32_t remaining_data = test_data->input_data_sz;
 	uint8_t *input_data_ptr = test_data->input_data;
-	size_t data_sz;
+	size_t data_sz = 0;
 	uint8_t *data_addr;
 	uint32_t i, j;
+	uint16_t segs_per_mbuf = 0;
 
 	for (i = 0; i < mem->total_bufs; i++) {
 		/* Allocate data in input mbuf and copy data from input file */
@@ -204,7 +224,7 @@  prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
 		remaining_data -= data_sz;
 
 		/* Already one segment in the mbuf */
-		uint16_t segs_per_mbuf = 1;
+		segs_per_mbuf = 1;
 
 		/* Chain mbufs if needed for input mbufs */
 		while (segs_per_mbuf < test_data->max_sgl_segs
@@ -281,5 +301,75 @@  prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
 		}
 	}
 
+	tests_res.segments_per_last_buff = segs_per_mbuf;
+	tests_res.last_segment_sz = data_sz;
+
 	return 0;
 }
+
+void
+print_test_dynamics(void)
+{
+	uint32_t opt_total_segs = DIV_CEIL(tests_res.input_data_sz,
+			MAX_SEG_SIZE);
+
+	if (tests_res.total_buffs > 1) {
+		printf("\nWarning: for the current input parameters number"
+				" of ops is higher than one, which may result"
+				" in sub-optimal performance.\n");
+		printf("To improve the performance (for the current"
+				" input data) following parameters are"
+				" suggested:\n");
+		printf("	• Segment size: %d\n", MAX_SEG_SIZE);
+		printf("	• Number of segments: %u\n", opt_total_segs);
+	} else if (tests_res.total_buffs == 1) {
+		printf("\nWarning: there is only one op with %u segments –"
+				" the compression ratio is the best.\n",
+			tests_res.segments_per_last_buff);
+		if (tests_res.segment_sz < MAX_SEG_SIZE)
+			printf("To reduce compression time, please use"
+					" bigger segment size: %d.\n",
+				MAX_SEG_SIZE);
+		else if (tests_res.segment_sz == MAX_SEG_SIZE)
+			printf("Segment size is optimal for the best"
+					" performance.\n");
+	} else
+		printf("Warning: something wrong happened!!\n");
+
+	printf("\nFor the current input parameters (segment size = %u,"
+			" segments number = %u):\n",
+		tests_res.segment_sz,
+		tests_res.segments_per_buff);
+	printf("	• Total number of buffers: %d\n",
+		tests_res.total_segments);
+	printf("	• %u buffer(s) %u bytes long, last buffer %u"
+			" byte(s) long\n",
+		tests_res.total_segments - 1,
+		tests_res.segment_sz,
+		tests_res.last_segment_sz);
+	printf("	• Number of ops: %u\n", tests_res.total_buffs);
+	printf("	• Total memory allocation: %u\n",
+		(tests_res.total_segments - 1) * tests_res.segment_sz
+		+ tests_res.last_segment_sz);
+	if (tests_res.total_buffs > 1)
+		printf("	• %u ops: %u segment(s) in each,"
+				" segment size %u\n",
+			tests_res.total_buffs - 1,
+			tests_res.segments_per_buff,
+			tests_res.segment_sz);
+	if (tests_res.segments_per_last_buff > 1) {
+		printf("	• 1 op %u segments:\n",
+				tests_res.segments_per_last_buff);
+		printf("		o %u segment size %u\n",
+			tests_res.segments_per_last_buff - 1,
+			tests_res.segment_sz);
+		printf("		o last segment size %u\n",
+			tests_res.last_segment_sz);
+	} else if (tests_res.segments_per_last_buff == 1) {
+		printf("	• 1 op (the last one): %u segment %u"
+				" byte(s) long\n\n",
+			tests_res.segments_per_last_buff,
+			tests_res.last_segment_sz);
+	}
+	printf("\n");
+}
diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
index 9c11e3a00..c9e0c9081 100644
--- a/app/test-compress-perf/comp_perf_test_common.h
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -13,6 +13,9 @@  struct cperf_mem_resources {
 	uint8_t dev_id;
 	uint16_t qp_id;
 	uint8_t lcore_id;
+
+	rte_atomic16_t print_info_once;
+
 	uint32_t total_bufs;
 	uint8_t *compressed_data;
 	uint8_t *decompressed_data;
@@ -38,4 +41,7 @@  comp_perf_allocate_memory(struct comp_test_data *test_data,
 int
 prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
 
+void
+print_test_dynamics(void);
+
 #endif /* _COMP_PERF_TEST_COMMON_H_ */
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index e746e4708..e7ac412e6 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -363,7 +363,7 @@  main(int argc, char **argv)
 
 	printf("App uses socket: %u\n", rte_socket_id());
 	printf("Burst size = %u\n", test_data->burst_sz);
-	printf("File size = %zu\n", test_data->input_data_sz);
+	printf("Input data size = %zu\n", test_data->input_data_sz);
 
 	test_data->cleanup = ST_DURING_TEST;
 	total_nb_qps = nb_compressdevs * test_data->nb_qps;
@@ -390,6 +390,8 @@  main(int argc, char **argv)
 		i++;
 	}
 
+	print_test_dynamics(); /* constructors must be executed first */
+
 	while (test_data->level <= test_data->level_lst.max) {
 
 		i = 0;