@@ -31,6 +31,7 @@
#define CPERF_AUTH_DIGEST_SZ ("auth-digest-sz")
#define CPERF_AUTH_AAD_SZ ("auth-aad-sz")
#define CPERF_CSV ("csv-friendly")
+#define CPERF_MINIMISE_OFFLOAD_COST ("minimise-offload-cost")
#define MAX_LIST 32
@@ -65,6 +66,7 @@ struct cperf_options {
uint32_t out_of_place:1;
uint32_t silent:1;
uint32_t csv:1;
+ uint32_t minimise_offload_cost:1;
enum rte_crypto_cipher_algorithm cipher_algo;
enum rte_crypto_cipher_operation cipher_op;
@@ -439,6 +439,15 @@ parse_silent(struct cperf_options *opts,
}
static int
+parse_minimise_offload_cost(struct cperf_options *opts,
+ const char *arg __rte_unused)
+{
+ opts->minimise_offload_cost = 1;
+
+ return 0;
+}
+
+static int
parse_cipher_algo(struct cperf_options *opts, const char *arg)
{
@@ -603,6 +612,7 @@ static struct option lgopts[] = {
{ CPERF_AUTH_DIGEST_SZ, required_argument, 0, 0 },
{ CPERF_AUTH_AAD_SZ, required_argument, 0, 0 },
{ CPERF_CSV, no_argument, 0, 0},
+ { CPERF_MINIMISE_OFFLOAD_COST, no_argument, 0, 0 },
{ NULL, 0, 0, 0 }
};
@@ -640,6 +650,7 @@ cperf_options_default(struct cperf_options *opts)
opts->sessionless = 0;
opts->out_of_place = 0;
opts->csv = 0;
+ opts->minimise_offload_cost = 0;
opts->cipher_algo = RTE_CRYPTO_CIPHER_AES_CBC;
opts->cipher_op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
@@ -681,6 +692,7 @@ cperf_opts_parse_long(int opt_idx, struct cperf_options *opts)
{ CPERF_AUTH_DIGEST_SZ, parse_auth_digest_sz },
{ CPERF_AUTH_AAD_SZ, parse_auth_aad_sz },
{ CPERF_CSV, parse_csv_friendly},
+ { CPERF_MINIMISE_OFFLOAD_COST, parse_minimise_offload_cost },
};
unsigned int i;
@@ -320,6 +320,9 @@ cperf_throughput_test_runner(void *test_ctx)
uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
uint64_t m_idx = 0, tsc_start, tsc_end, tsc_duration;
+ uint64_t wait_start = 0, wait_end = 0;
+ uint64_t wait_duration = 0;
+ uint32_t wait_cycles = 0;
uint16_t ops_unused = 0;
@@ -375,8 +378,13 @@ cperf_throughput_test_runner(void *test_ctx)
/* Enqueue burst of ops on crypto device */
ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
ops, burst_size);
- if (ops_enqd < burst_size)
+ if (ops_enqd < burst_size) {
ops_enqd_failed++;
+ wait_cycles += 10;
+ } else {
+ if (wait_cycles)
+ wait_cycles--;
+ }
/**
* Calculate number of ops not enqueued (mainly for hw
@@ -385,6 +393,26 @@ cperf_throughput_test_runner(void *test_ctx)
ops_unused = burst_size - ops_enqd;
ops_enqd_total += ops_enqd;
+ /**
+ * Minimum offload cost will be achieved when the
+ * specified burst_size is enqueued. Rather than
+ * wasting CPU cycles continually retrying, with a
+ * fraction of the burst being enqueued each time,
+ * back off until a full burst can be enqueued.
+ * The cycles counted here represent cycles saved by
+ * offloading, which in a real application are
+ * available for other work. Hence these cycles are
+ * deducted from the total cycle-count to show the
+ * offload cost.
+ */
+ if (ctx->options->minimise_offload_cost &&
+ wait_cycles > 30) {
+ wait_start = wait_end = rte_rdtsc_precise();
+ while ((wait_end - wait_start) < wait_cycles)
+ wait_end = rte_rdtsc_precise();
+ wait_duration += (wait_end - wait_start);
+ }
+
/* Dequeue processed burst of ops from crypto device */
ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
@@ -400,13 +428,19 @@ cperf_throughput_test_runner(void *test_ctx)
rte_crypto_op_free(ops_processed[i]);
ops_deqd_total += ops_deqd;
- } else {
+ }
+
+ if (ops_deqd != test_burst_size) {
/**
- * Count dequeue polls which didn't return any
- * processed operations. This statistic is mainly
+ * Count dequeue polls which don't return a
+ * full burst. This statistic is mainly
* relevant to hw accelerators.
*/
ops_deqd_failed++;
+ wait_cycles += 10;
+ } else {
+ if (wait_cycles)
+ wait_cycles--;
}
m_idx += ops_needed;
@@ -415,8 +449,7 @@ cperf_throughput_test_runner(void *test_ctx)
}
/* Dequeue any operations still in the crypto device */
-
- while (ops_deqd_total < ctx->options->total_ops) {
+ while (ops_deqd_total < ops_enqd_total) {
/* Sending 0 length burst to flush sw crypto device */
rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
@@ -447,18 +480,22 @@ cperf_throughput_test_runner(void *test_ctx)
/* Calculate average cycles per packet */
double cycles_per_packet = ((double)tsc_duration /
ctx->options->total_ops);
+ double available_cycles_per_packet = ((double)wait_duration /
+ ctx->options->total_ops);
if (!ctx->options->csv) {
if (!only_once)
- printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
- "lcore id", "Buf Size", "Burst Size",
+ printf(
+ "%8s%10s%6s%12s%12s%12s%12s%8s%8s%18s%10s%12s\n\n",
+ "lcore id", "Buf Size", "Burst",
"Enqueued", "Dequeued", "Failed Enq",
"Failed Deq", "MOps", "Gbps",
- "Cycles/Buf");
+ "Cycles/Buf(Total)", "(Offload)",
+ "(Available)");
only_once = 1;
- printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64
- "%12"PRIu64"%12.4f%12.4f%12.2f\n",
+ printf("%8u%10u%6u%12"PRIu64"%12"PRIu64"%12"PRIu64
+ "%12"PRIu64"%8.4f%8.4f%12.0f%12.0f%12.0f\n",
ctx->lcore_id,
ctx->options->test_buffer_size,
test_burst_size,
@@ -468,17 +505,22 @@ cperf_throughput_test_runner(void *test_ctx)
ops_deqd_failed,
ops_per_second/1000000,
throughput_gbps,
- cycles_per_packet);
+ cycles_per_packet,
+ cycles_per_packet
+ - available_cycles_per_packet,
+ available_cycles_per_packet);
+
} else {
if (!only_once)
printf("# lcore id, Buffer Size(B),"
"Burst Size,Enqueued,Dequeued,Failed Enq,"
"Failed Deq,Ops(Millions),Throughput(Gbps),"
- "Cycles/Buf\n\n");
+ "Cycles/Buf(Total),(Offload),"
+ "(Available)\n\n");
only_once = 1;
printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
- "%.f3;%.f3;%.f3\n",
+ "%.f3;%.f3;%.f3;%.f3;%.f3\n",
ctx->lcore_id,
ctx->options->test_buffer_size,
test_burst_size,
@@ -488,7 +530,11 @@ cperf_throughput_test_runner(void *test_ctx)
ops_deqd_failed,
ops_per_second/1000000,
throughput_gbps,
- cycles_per_packet);
+ cycles_per_packet,
+ cycles_per_packet
+ - available_cycles_per_packet,
+ available_cycles_per_packet);
+
}
/* Get next size from range or list */