[v5,5/5] test/thash: add performance tests for the Toeplitz hash
Checks
Commit Message
This patch adds performance tests for different implementations
of the Toeplitz hash function.
Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
app/test/meson.build | 2 +
app/test/test_thash_perf.c | 120 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 122 insertions(+)
create mode 100644 app/test/test_thash_perf.c
Comments
21/10/2021 20:54, Vladimir Medvedkin:
> This patch adds performance tests for different implementations
> of the Toeplitz hash function.
Please name them.
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
There are some garbage,
> @@ -320,6 +321,7 @@ perf_test_names = [
> 'hash_readwrite_lf_perf_autotest',
> 'trace_perf_autotest',
> 'ipsec_perf_autotest',
> + 'thash_perf_autotest',
here (tabs instead of space)
> driver_test_names = [
> diff --git a/app/test/test_thash_perf.c b/app/test/test_thash_perf.c
> new file mode 100644
> index 0000000..fb66e20
> --- /dev/null
> +++ b/app/test/test_thash_perf.c
> @@ -0,0 +1,120 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2021 Intel Corporation
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <math.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +#include <rte_random.h>
> +#include <rte_thash.h>
> +
> +#include "test.h"
> +
> +#define ITERATIONS (1 << 15)
> +#define BATCH_SZ (1 << 10)
> +
> +#define IPV4_2_TUPLE_LEN (8)
> +#define IPV4_4_TUPLE_LEN (12)
> +#define IPV6_2_TUPLE_LEN (32)
> +#define IPV6_4_TUPLE_LEN (36)
> +
> +
> +static uint8_t default_rss_key[] = {
> + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
> + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
> + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
> + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
> + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
> +};
> +
> +static void
> +run_thash_test(unsigned int tuple_len)
> +{
> + uint32_t *tuples[BATCH_SZ];
> + unsigned int i, j;
> + uint64_t start_tsc, end_tsc;
> + uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
> + volatile uint32_t hash = 0;
> + uint32_t bulk_hash[BATCH_SZ] = { 0 };
> +
> + for (i = 0; i < BATCH_SZ; i++) {
> + tuples[i] = rte_zmalloc(NULL, len, 0);
> + for (j = 0; j < len / sizeof(uint32_t); j++)
> + tuples[i][j] = rte_rand();
> + }
> +
> + start_tsc = rte_rdtsc_precise();
> + for (i = 0; i < ITERATIONS; i++) {
> + for (j = 0; j < BATCH_SZ; j++) {
> + hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
> + default_rss_key);
> + }
> + }
> + end_tsc = rte_rdtsc_precise();
> +
> + printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> + BATCH_SZ), len);
> +
> + start_tsc = rte_rdtsc_precise();
> + for (i = 0; i < ITERATIONS; i++) {
> + for (j = 0; j < BATCH_SZ; j++) {
> + hash ^= rte_softrss_be(tuples[j], len /
> + sizeof(uint32_t), default_rss_key);
> + }
> + }
> + end_tsc = rte_rdtsc_precise();
> +
> + printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> + BATCH_SZ), len);
The function could stop here (one function per type of implementation).
> +
> + if (!rte_thash_gfni_supported())
> + return;
> +
> + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
> +
> + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
> + RTE_DIM(default_rss_key));
> +
> + start_tsc = rte_rdtsc_precise();
> + for (i = 0; i < ITERATIONS; i++) {
> + for (j = 0; j < BATCH_SZ; j++)
> + hash ^= rte_thash_gfni(rss_key_matrixes,
> + (uint8_t *)tuples[j], len);
> + }
> + end_tsc = rte_rdtsc_precise();
> +
> + printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> + BATCH_SZ), len);
> +
> + start_tsc = rte_rdtsc_precise();
> + for (i = 0; i < ITERATIONS; i++)
> + rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
> + bulk_hash, BATCH_SZ);
> +
> + end_tsc = rte_rdtsc_precise();
> +
> + printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
and here, the function name is not updated.
> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> + BATCH_SZ), len);
> +
useless blank line
> +}
On Thu, 21 Oct 2021 19:54:29 +0100
Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:
> +static uint8_t default_rss_key[] = {
Should this be const?
That way you can make sure API isn't modifying it.
Hi Thomas,
Thanks for the review, I'll address your comments in v6.
Please find my comment below
On 25/10/2021 19:02, Thomas Monjalon wrote:
> 21/10/2021 20:54, Vladimir Medvedkin:
>> This patch adds performance tests for different implementations
>> of the Toeplitz hash function.
>
> Please name them.
>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>
> There are some garbage,
>
>> @@ -320,6 +321,7 @@ perf_test_names = [
>> 'hash_readwrite_lf_perf_autotest',
>> 'trace_perf_autotest',
>> 'ipsec_perf_autotest',
>> + 'thash_perf_autotest',
>
> here (tabs instead of space)
>
>> driver_test_names = [
>> diff --git a/app/test/test_thash_perf.c b/app/test/test_thash_perf.c
>> new file mode 100644
>> index 0000000..fb66e20
>> --- /dev/null
>> +++ b/app/test/test_thash_perf.c
>> @@ -0,0 +1,120 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2021 Intel Corporation
>> + */
>> +
>> +#include <stdio.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <math.h>
>> +
>> +#include <rte_cycles.h>
>> +#include <rte_malloc.h>
>> +#include <rte_random.h>
>> +#include <rte_thash.h>
>> +
>> +#include "test.h"
>> +
>> +#define ITERATIONS (1 << 15)
>> +#define BATCH_SZ (1 << 10)
>> +
>> +#define IPV4_2_TUPLE_LEN (8)
>> +#define IPV4_4_TUPLE_LEN (12)
>> +#define IPV6_2_TUPLE_LEN (32)
>> +#define IPV6_4_TUPLE_LEN (36)
>> +
>> +
>> +static uint8_t default_rss_key[] = {
>> + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
>> + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
>> + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
>> + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
>> + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
>> +};
>> +
>> +static void
>> +run_thash_test(unsigned int tuple_len)
>> +{
>> + uint32_t *tuples[BATCH_SZ];
>> + unsigned int i, j;
>> + uint64_t start_tsc, end_tsc;
>> + uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
>> + volatile uint32_t hash = 0;
>> + uint32_t bulk_hash[BATCH_SZ] = { 0 };
>> +
>> + for (i = 0; i < BATCH_SZ; i++) {
>> + tuples[i] = rte_zmalloc(NULL, len, 0);
>> + for (j = 0; j < len / sizeof(uint32_t); j++)
>> + tuples[i][j] = rte_rand();
>> + }
>> +
>> + start_tsc = rte_rdtsc_precise();
>> + for (i = 0; i < ITERATIONS; i++) {
>> + for (j = 0; j < BATCH_SZ; j++) {
>> + hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
>> + default_rss_key);
>> + }
>> + }
>> + end_tsc = rte_rdtsc_precise();
>> +
>> + printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>> + BATCH_SZ), len);
>> +
>> + start_tsc = rte_rdtsc_precise();
>> + for (i = 0; i < ITERATIONS; i++) {
>> + for (j = 0; j < BATCH_SZ; j++) {
>> + hash ^= rte_softrss_be(tuples[j], len /
>> + sizeof(uint32_t), default_rss_key);
>> + }
>> + }
>> + end_tsc = rte_rdtsc_precise();
>> +
>> + printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>> + BATCH_SZ), len);
>
> The function could stop here (one function per type of implementation).
>
Could you please clarify what do you mean?
The function stops here if the machine do not support GFNI, and this is
done intentionally. On machine without GFNI it tests only scalar
implementations for every given length.
>> +
>> + if (!rte_thash_gfni_supported())
>> + return;
>> +
>> + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
>> +
>> + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
>> + RTE_DIM(default_rss_key));
>> +
>> + start_tsc = rte_rdtsc_precise();
>> + for (i = 0; i < ITERATIONS; i++) {
>> + for (j = 0; j < BATCH_SZ; j++)
>> + hash ^= rte_thash_gfni(rss_key_matrixes,
>> + (uint8_t *)tuples[j], len);
>> + }
>> + end_tsc = rte_rdtsc_precise();
>> +
>> + printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>> + BATCH_SZ), len);
>> +
>> + start_tsc = rte_rdtsc_precise();
>> + for (i = 0; i < ITERATIONS; i++)
>> + rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
>> + bulk_hash, BATCH_SZ);
>> +
>> + end_tsc = rte_rdtsc_precise();
>> +
>> + printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
>
> and here, the function name is not updated.
>
>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>> + BATCH_SZ), len);
>> +
>
> useless blank line
>
>> +}
>
>
>
Hi Stephen,
On 25/10/2021 19:27, Stephen Hemminger wrote:
> On Thu, 21 Oct 2021 19:54:29 +0100
> Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:
>
>> +static uint8_t default_rss_key[] = {
>
> Should this be const?
>
> That way you can make sure API isn't modifying it.
>
Thanks, I'll fix this in v6
26/10/2021 22:29, Medvedkin, Vladimir:
> Hi Thomas,
>
> Thanks for the review, I'll address your comments in v6.
> Please find my comment below
>
> On 25/10/2021 19:02, Thomas Monjalon wrote:
> > 21/10/2021 20:54, Vladimir Medvedkin:
> >> This patch adds performance tests for different implementations
> >> of the Toeplitz hash function.
> >
> > Please name them.
> >
> >> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> >
> > There are some garbage,
> >
> >> @@ -320,6 +321,7 @@ perf_test_names = [
> >> 'hash_readwrite_lf_perf_autotest',
> >> 'trace_perf_autotest',
> >> 'ipsec_perf_autotest',
> >> + 'thash_perf_autotest',
> >
> > here (tabs instead of space)
> >
> >> driver_test_names = [
> >> diff --git a/app/test/test_thash_perf.c b/app/test/test_thash_perf.c
> >> new file mode 100644
> >> index 0000000..fb66e20
> >> --- /dev/null
> >> +++ b/app/test/test_thash_perf.c
> >> @@ -0,0 +1,120 @@
> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> + * Copyright(c) 2021 Intel Corporation
> >> + */
> >> +
> >> +#include <stdio.h>
> >> +#include <stdint.h>
> >> +#include <stdlib.h>
> >> +#include <math.h>
> >> +
> >> +#include <rte_cycles.h>
> >> +#include <rte_malloc.h>
> >> +#include <rte_random.h>
> >> +#include <rte_thash.h>
> >> +
> >> +#include "test.h"
> >> +
> >> +#define ITERATIONS (1 << 15)
> >> +#define BATCH_SZ (1 << 10)
> >> +
> >> +#define IPV4_2_TUPLE_LEN (8)
> >> +#define IPV4_4_TUPLE_LEN (12)
> >> +#define IPV6_2_TUPLE_LEN (32)
> >> +#define IPV6_4_TUPLE_LEN (36)
> >> +
> >> +
> >> +static uint8_t default_rss_key[] = {
> >> + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
> >> + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
> >> + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
> >> + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
> >> + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
> >> +};
> >> +
> >> +static void
> >> +run_thash_test(unsigned int tuple_len)
> >> +{
> >> + uint32_t *tuples[BATCH_SZ];
> >> + unsigned int i, j;
> >> + uint64_t start_tsc, end_tsc;
> >> + uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
> >> + volatile uint32_t hash = 0;
> >> + uint32_t bulk_hash[BATCH_SZ] = { 0 };
> >> +
> >> + for (i = 0; i < BATCH_SZ; i++) {
> >> + tuples[i] = rte_zmalloc(NULL, len, 0);
> >> + for (j = 0; j < len / sizeof(uint32_t); j++)
> >> + tuples[i][j] = rte_rand();
> >> + }
> >> +
> >> + start_tsc = rte_rdtsc_precise();
> >> + for (i = 0; i < ITERATIONS; i++) {
> >> + for (j = 0; j < BATCH_SZ; j++) {
> >> + hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
> >> + default_rss_key);
> >> + }
> >> + }
> >> + end_tsc = rte_rdtsc_precise();
> >> +
> >> + printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
> >> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> >> + BATCH_SZ), len);
> >> +
> >> + start_tsc = rte_rdtsc_precise();
> >> + for (i = 0; i < ITERATIONS; i++) {
> >> + for (j = 0; j < BATCH_SZ; j++) {
> >> + hash ^= rte_softrss_be(tuples[j], len /
> >> + sizeof(uint32_t), default_rss_key);
> >> + }
> >> + }
> >> + end_tsc = rte_rdtsc_precise();
> >> +
> >> + printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
> >> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> >> + BATCH_SZ), len);
> >
> > The function could stop here (one function per type of implementation).
> >
>
> Could you please clarify what do you mean?
> The function stops here if the machine do not support GFNI, and this is
> done intentionally. On machine without GFNI it tests only scalar
> implementations for every given length.
No I mean you can split in smaller functions.
> >> +
> >> + if (!rte_thash_gfni_supported())
> >> + return;
> >> +
> >> + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
> >> +
> >> + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
> >> + RTE_DIM(default_rss_key));
> >> +
> >> + start_tsc = rte_rdtsc_precise();
> >> + for (i = 0; i < ITERATIONS; i++) {
> >> + for (j = 0; j < BATCH_SZ; j++)
> >> + hash ^= rte_thash_gfni(rss_key_matrixes,
> >> + (uint8_t *)tuples[j], len);
> >> + }
> >> + end_tsc = rte_rdtsc_precise();
> >> +
> >> + printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
> >> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> >> + BATCH_SZ), len);
> >> +
> >> + start_tsc = rte_rdtsc_precise();
> >> + for (i = 0; i < ITERATIONS; i++)
> >> + rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
> >> + bulk_hash, BATCH_SZ);
> >> +
> >> + end_tsc = rte_rdtsc_precise();
> >> +
> >> + printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
> >
> > and here, the function name is not updated.
> >
> >> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
> >> + BATCH_SZ), len);
> >> +
> >
> > useless blank line
> >
> >> +}
Hi Thomas,
On 27/10/2021 10:29, Thomas Monjalon wrote:
> 26/10/2021 22:29, Medvedkin, Vladimir:
>> Hi Thomas,
>>
>> Thanks for the review, I'll address your comments in v6.
>> Please find my comment below
>>
>> On 25/10/2021 19:02, Thomas Monjalon wrote:
>>> 21/10/2021 20:54, Vladimir Medvedkin:
>>>> This patch adds performance tests for different implementations
>>>> of the Toeplitz hash function.
>>>
>>> Please name them.
>>>
>>>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>>>
>>> There are some garbage,
>>>
>>>> @@ -320,6 +321,7 @@ perf_test_names = [
>>>> 'hash_readwrite_lf_perf_autotest',
>>>> 'trace_perf_autotest',
>>>> 'ipsec_perf_autotest',
>>>> + 'thash_perf_autotest',
>>>
>>> here (tabs instead of space)
>>>
>>>> driver_test_names = [
>>>> diff --git a/app/test/test_thash_perf.c b/app/test/test_thash_perf.c
>>>> new file mode 100644
>>>> index 0000000..fb66e20
>>>> --- /dev/null
>>>> +++ b/app/test/test_thash_perf.c
>>>> @@ -0,0 +1,120 @@
>>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>>> + * Copyright(c) 2021 Intel Corporation
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <stdint.h>
>>>> +#include <stdlib.h>
>>>> +#include <math.h>
>>>> +
>>>> +#include <rte_cycles.h>
>>>> +#include <rte_malloc.h>
>>>> +#include <rte_random.h>
>>>> +#include <rte_thash.h>
>>>> +
>>>> +#include "test.h"
>>>> +
>>>> +#define ITERATIONS (1 << 15)
>>>> +#define BATCH_SZ (1 << 10)
>>>> +
>>>> +#define IPV4_2_TUPLE_LEN (8)
>>>> +#define IPV4_4_TUPLE_LEN (12)
>>>> +#define IPV6_2_TUPLE_LEN (32)
>>>> +#define IPV6_4_TUPLE_LEN (36)
>>>> +
>>>> +
>>>> +static uint8_t default_rss_key[] = {
>>>> + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
>>>> + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
>>>> + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
>>>> + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
>>>> + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
>>>> +};
>>>> +
>>>> +static void
>>>> +run_thash_test(unsigned int tuple_len)
>>>> +{
>>>> + uint32_t *tuples[BATCH_SZ];
>>>> + unsigned int i, j;
>>>> + uint64_t start_tsc, end_tsc;
>>>> + uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
>>>> + volatile uint32_t hash = 0;
>>>> + uint32_t bulk_hash[BATCH_SZ] = { 0 };
>>>> +
>>>> + for (i = 0; i < BATCH_SZ; i++) {
>>>> + tuples[i] = rte_zmalloc(NULL, len, 0);
>>>> + for (j = 0; j < len / sizeof(uint32_t); j++)
>>>> + tuples[i][j] = rte_rand();
>>>> + }
>>>> +
>>>> + start_tsc = rte_rdtsc_precise();
>>>> + for (i = 0; i < ITERATIONS; i++) {
>>>> + for (j = 0; j < BATCH_SZ; j++) {
>>>> + hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
>>>> + default_rss_key);
>>>> + }
>>>> + }
>>>> + end_tsc = rte_rdtsc_precise();
>>>> +
>>>> + printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
>>>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> + BATCH_SZ), len);
>>>> +
>>>> + start_tsc = rte_rdtsc_precise();
>>>> + for (i = 0; i < ITERATIONS; i++) {
>>>> + for (j = 0; j < BATCH_SZ; j++) {
>>>> + hash ^= rte_softrss_be(tuples[j], len /
>>>> + sizeof(uint32_t), default_rss_key);
>>>> + }
>>>> + }
>>>> + end_tsc = rte_rdtsc_precise();
>>>> +
>>>> + printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
>>>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> + BATCH_SZ), len);
>>>
>>> The function could stop here (one function per type of implementation).
>>>
>>
>> Could you please clarify what do you mean?
>> The function stops here if the machine do not support GFNI, and this is
>> done intentionally. On machine without GFNI it tests only scalar
>> implementations for every given length.
>
> No I mean you can split in smaller functions.
>
Aha, I see, I'll send v7.
>>>> +
>>>> + if (!rte_thash_gfni_supported())
>>>> + return;
>>>> +
>>>> + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
>>>> +
>>>> + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
>>>> + RTE_DIM(default_rss_key));
>>>> +
>>>> + start_tsc = rte_rdtsc_precise();
>>>> + for (i = 0; i < ITERATIONS; i++) {
>>>> + for (j = 0; j < BATCH_SZ; j++)
>>>> + hash ^= rte_thash_gfni(rss_key_matrixes,
>>>> + (uint8_t *)tuples[j], len);
>>>> + }
>>>> + end_tsc = rte_rdtsc_precise();
>>>> +
>>>> + printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
>>>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> + BATCH_SZ), len);
>>>> +
>>>> + start_tsc = rte_rdtsc_precise();
>>>> + for (i = 0; i < ITERATIONS; i++)
>>>> + rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
>>>> + bulk_hash, BATCH_SZ);
>>>> +
>>>> + end_tsc = rte_rdtsc_precise();
>>>> +
>>>> + printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
>>>
>>> and here, the function name is not updated.
>>>
>>>> + (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> + BATCH_SZ), len);
>>>> +
>>>
>>> useless blank line
>>>
>>>> +}
>
>
>
@@ -144,6 +144,7 @@ test_sources = files(
'test_table_tables.c',
'test_tailq.c',
'test_thash.c',
+ 'test_thash_perf.c',
'test_timer.c',
'test_timer_perf.c',
'test_timer_racecond.c',
@@ -320,6 +321,7 @@ perf_test_names = [
'hash_readwrite_lf_perf_autotest',
'trace_perf_autotest',
'ipsec_perf_autotest',
+ 'thash_perf_autotest',
]
driver_test_names = [
new file mode 100644
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_thash.h>
+
+#include "test.h"
+
+#define ITERATIONS (1 << 15)
+#define BATCH_SZ (1 << 10)
+
+#define IPV4_2_TUPLE_LEN (8)
+#define IPV4_4_TUPLE_LEN (12)
+#define IPV6_2_TUPLE_LEN (32)
+#define IPV6_4_TUPLE_LEN (36)
+
+
+static uint8_t default_rss_key[] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};
+
+static void
+run_thash_test(unsigned int tuple_len)
+{
+ uint32_t *tuples[BATCH_SZ];
+ unsigned int i, j;
+ uint64_t start_tsc, end_tsc;
+ uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
+ volatile uint32_t hash = 0;
+ uint32_t bulk_hash[BATCH_SZ] = { 0 };
+
+ for (i = 0; i < BATCH_SZ; i++) {
+ tuples[i] = rte_zmalloc(NULL, len, 0);
+ for (j = 0; j < len / sizeof(uint32_t); j++)
+ tuples[i][j] = rte_rand();
+ }
+
+ start_tsc = rte_rdtsc_precise();
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < BATCH_SZ; j++) {
+ hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
+ default_rss_key);
+ }
+ }
+ end_tsc = rte_rdtsc_precise();
+
+ printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
+ (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
+ BATCH_SZ), len);
+
+ start_tsc = rte_rdtsc_precise();
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < BATCH_SZ; j++) {
+ hash ^= rte_softrss_be(tuples[j], len /
+ sizeof(uint32_t), default_rss_key);
+ }
+ }
+ end_tsc = rte_rdtsc_precise();
+
+ printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
+ (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
+ BATCH_SZ), len);
+
+ if (!rte_thash_gfni_supported())
+ return;
+
+ uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
+
+ rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
+ RTE_DIM(default_rss_key));
+
+ start_tsc = rte_rdtsc_precise();
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < BATCH_SZ; j++)
+ hash ^= rte_thash_gfni(rss_key_matrixes,
+ (uint8_t *)tuples[j], len);
+ }
+ end_tsc = rte_rdtsc_precise();
+
+ printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
+ (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
+ BATCH_SZ), len);
+
+ start_tsc = rte_rdtsc_precise();
+ for (i = 0; i < ITERATIONS; i++)
+ rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
+ bulk_hash, BATCH_SZ);
+
+ end_tsc = rte_rdtsc_precise();
+
+ printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
+ (double)(end_tsc - start_tsc) / (double)(ITERATIONS *
+ BATCH_SZ), len);
+
+}
+
+static int
+test_thash_perf(void)
+{
+ run_thash_test(IPV4_2_TUPLE_LEN);
+ run_thash_test(IPV4_4_TUPLE_LEN);
+ run_thash_test(IPV6_2_TUPLE_LEN);
+ run_thash_test(IPV6_4_TUPLE_LEN);
+
+ return 0;
+}
+
+REGISTER_TEST_COMMAND(thash_perf_autotest, test_thash_perf);