[2/5] hash: enable gfni thash implementation
Checks
Commit Message
This patch enables new GFNI Toeplitz hash in
predictable RSS library.
Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
lib/hash/rte_thash.h | 19 +++++++++++++++++++
lib/hash/version.map | 1 +
3 files changed, 59 insertions(+), 4 deletions(-)
Comments
> This patch enables new GFNI Toeplitz hash in
> predictable RSS library.
>
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
> lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
> lib/hash/rte_thash.h | 19 +++++++++++++++++++
> lib/hash/version.map | 1 +
> 3 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
> index 07447f7..86a0e96 100644
> --- a/lib/hash/rte_thash.c
> +++ b/lib/hash/rte_thash.c
> @@ -86,6 +86,8 @@ struct rte_thash_ctx {
> uint32_t reta_sz_log; /** < size of the RSS ReTa in bits */
> uint32_t subtuples_nb; /** < number of subtuples */
> uint32_t flags;
> + uint64_t *matrices;
Comment, what is that, etc.
> + /**< rte_thash_complete_matrix generated matrices */
> uint8_t hash_key[0];
> };
>
> @@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> ctx->hash_key[i] = rte_rand();
> }
>
> + if (rte_thash_gfni_supported) {
I think it should be:
if (rte_thash_gfni_supported && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
> + ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
> + RTE_CACHE_LINE_SIZE);
You can do it probably before allocation ctx, at the same place where te is allocated.
Might be a bit nicer.
> + if (ctx->matrices == NULL)
RTE_LOG(ERR, ...);
rte_ernno = ENOMEM;
> + goto free_ctx;
> +
> + rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
> + key_len);
> + }
> +
> te->data = (void *)ctx;
> TAILQ_INSERT_TAIL(thash_list, te, next);
>
> rte_mcfg_tailq_write_unlock();
>
> return ctx;
> +
> +free_ctx:
> + rte_free(ctx);
> free_te:
> rte_free(te);
> exit:
> @@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
> set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
> }
>
> + if (rte_thash_gfni_supported)
Here and in data-path functions, I think it would be better:
if (ctx->matrices != NULL)
> + rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
> + ctx->key_len);
> +
> return 0;
> }
>
> @@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
> return ctx->hash_key;
> }
>
> +const uint64_t *
> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
> +{
> + if (rte_thash_gfni_supported)
> + return ctx->matrices;
Why not just always:
return ctx->matices;
?
> +
> + rte_errno = ENOTSUP;
> + return NULL;
> +}
> +
> static inline uint8_t
> read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
> {
> @@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
> attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
>
> for (i = 0; i < attempts; i++) {
> - for (j = 0; j < (tuple_len / 4); j++)
> - tmp_tuple[j] =
> - rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
> + if (rte_thash_gfni_supported)
if (ctx->matrices)
> + hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
> + else {
> + for (j = 0; j < (tuple_len / 4); j++)
> + tmp_tuple[j] =
> + rte_be_to_cpu_32(
> + *(uint32_t *)&tuple[j * 4]);
> +
> + hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
> + }
>
> - hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
> adj_bits = rte_thash_get_complement(h, hash, desired_value);
>
> /*
> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> index e3f1fc6..6e6861c 100644
> --- a/lib/hash/rte_thash.h
> +++ b/lib/hash/rte_thash.h
> @@ -410,6 +410,25 @@ const uint8_t *
> rte_thash_get_key(struct rte_thash_ctx *ctx);
>
> /**
> + * Get a pointer to the toeplitz hash matrices contained in the context.
> + * These matrices could be used with fast toeplitz hash implementation if
> + * CPU supports GFNI.
> + * Matrices changes after each addition of a helper.
> + *
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * @param ctx
> + * Thash context
> + * @return
> + * A pointer to the toeplitz hash key matrices on success
> + * NULL if GFNI is not supported.
> + */
> +__rte_experimental
> +const uint64_t *
> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
> +
> +/**
> * Function prototype for the rte_thash_adjust_tuple
> * to check if adjusted tuple could be used.
> * Generally it is some kind of lookup function to check
> diff --git a/lib/hash/version.map b/lib/hash/version.map
> index cecf922..3eda695 100644
> --- a/lib/hash/version.map
> +++ b/lib/hash/version.map
> @@ -43,6 +43,7 @@ EXPERIMENTAL {
> rte_thash_find_existing;
> rte_thash_free_ctx;
> rte_thash_get_complement;
> + rte_thash_get_gfni_matrices;
> rte_thash_get_helper;
> rte_thash_get_key;
> rte_thash_gfni_supported;
> --
> 2.7.4
Hi Konstantin,
On 08/10/2021 13:31, Ananyev, Konstantin wrote:
>
>> This patch enables new GFNI Toeplitz hash in
>> predictable RSS library.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>> lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
>> lib/hash/rte_thash.h | 19 +++++++++++++++++++
>> lib/hash/version.map | 1 +
>> 3 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
>> index 07447f7..86a0e96 100644
>> --- a/lib/hash/rte_thash.c
>> +++ b/lib/hash/rte_thash.c
>> @@ -86,6 +86,8 @@ struct rte_thash_ctx {
>> uint32_t reta_sz_log; /** < size of the RSS ReTa in bits */
>> uint32_t subtuples_nb; /** < number of subtuples */
>> uint32_t flags;
>> + uint64_t *matrices;
>
> Comment, what is that, etc.
>
I'll rephrase the comment below.
>> + /**< rte_thash_complete_matrix generated matrices */
>> uint8_t hash_key[0];
>> };
>>
>> @@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
>> ctx->hash_key[i] = rte_rand();
>> }
>>
>> + if (rte_thash_gfni_supported) {
>
> I think it should be:
> if (rte_thash_gfni_supported && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
>
>
Agree
>> + ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
>> + RTE_CACHE_LINE_SIZE);
>
> You can do it probably before allocation ctx, at the same place where te is allocated.
> Might be a bit nicer.
>
I'd prefer to keep allocation and initialization of matrices in one
place, below there is rte_thash_complete_matrix() which uses previously
generated ctx->hash_key.
>> + if (ctx->matrices == NULL)
>
> RTE_LOG(ERR, ...);
> rte_ernno = ENOMEM;
>
Agree
>> + goto free_ctx;
>> +
>> + rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
>> + key_len);
>> + }
>> +
>> te->data = (void *)ctx;
>> TAILQ_INSERT_TAIL(thash_list, te, next);
>>
>> rte_mcfg_tailq_write_unlock();
>>
>> return ctx;
>> +
>> +free_ctx:
>> + rte_free(ctx);
>> free_te:
>> rte_free(te);
>> exit:
>> @@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
>> set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
>> }
>>
>> + if (rte_thash_gfni_supported)
>
> Here and in data-path functions, I think it would be better:
> if (ctx->matrices != NULL)
Agree
>> + rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
>> + ctx->key_len);
>> +
>> return 0;
>> }
>>
>> @@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
>> return ctx->hash_key;
>> }
>>
>> +const uint64_t *
>> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
>> +{
>> + if (rte_thash_gfni_supported)
>> + return ctx->matrices;
>
> Why not just always:
> return ctx->matices;
> ?
>
Agree
>> +
>> + rte_errno = ENOTSUP;
>> + return NULL;
>> +}
>> +
>> static inline uint8_t
>> read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
>> {
>> @@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
>> attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
>>
>> for (i = 0; i < attempts; i++) {
>> - for (j = 0; j < (tuple_len / 4); j++)
>> - tmp_tuple[j] =
>> - rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
>> + if (rte_thash_gfni_supported)
> if (ctx->matrices)
>
>> + hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
>> + else {
>> + for (j = 0; j < (tuple_len / 4); j++)
>> + tmp_tuple[j] =
>> + rte_be_to_cpu_32(
>> + *(uint32_t *)&tuple[j * 4]);
>> +
>> + hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
>> + }
>>
>> - hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
>> adj_bits = rte_thash_get_complement(h, hash, desired_value);
>>
>> /*
>> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
>> index e3f1fc6..6e6861c 100644
>> --- a/lib/hash/rte_thash.h
>> +++ b/lib/hash/rte_thash.h
>> @@ -410,6 +410,25 @@ const uint8_t *
>> rte_thash_get_key(struct rte_thash_ctx *ctx);
>>
>> /**
>> + * Get a pointer to the toeplitz hash matrices contained in the context.
>> + * These matrices could be used with fast toeplitz hash implementation if
>> + * CPU supports GFNI.
>> + * Matrices changes after each addition of a helper.
>> + *
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice.
>> + *
>> + * @param ctx
>> + * Thash context
>> + * @return
>> + * A pointer to the toeplitz hash key matrices on success
>> + * NULL if GFNI is not supported.
>> + */
>> +__rte_experimental
>> +const uint64_t *
>> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
>> +
>> +/**
>> * Function prototype for the rte_thash_adjust_tuple
>> * to check if adjusted tuple could be used.
>> * Generally it is some kind of lookup function to check
>> diff --git a/lib/hash/version.map b/lib/hash/version.map
>> index cecf922..3eda695 100644
>> --- a/lib/hash/version.map
>> +++ b/lib/hash/version.map
>> @@ -43,6 +43,7 @@ EXPERIMENTAL {
>> rte_thash_find_existing;
>> rte_thash_free_ctx;
>> rte_thash_get_complement;
>> + rte_thash_get_gfni_matrices;
>> rte_thash_get_helper;
>> rte_thash_get_key;
>> rte_thash_gfni_supported;
>> --
>> 2.7.4
>
@@ -86,6 +86,8 @@ struct rte_thash_ctx {
uint32_t reta_sz_log; /** < size of the RSS ReTa in bits */
uint32_t subtuples_nb; /** < number of subtuples */
uint32_t flags;
+ uint64_t *matrices;
+ /**< rte_thash_complete_matrix generated matrices */
uint8_t hash_key[0];
};
@@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
ctx->hash_key[i] = rte_rand();
}
+ if (rte_thash_gfni_supported) {
+ ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
+ RTE_CACHE_LINE_SIZE);
+ if (ctx->matrices == NULL)
+ goto free_ctx;
+
+ rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+ key_len);
+ }
+
te->data = (void *)ctx;
TAILQ_INSERT_TAIL(thash_list, te, next);
rte_mcfg_tailq_write_unlock();
return ctx;
+
+free_ctx:
+ rte_free(ctx);
free_te:
rte_free(te);
exit:
@@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
}
+ if (rte_thash_gfni_supported)
+ rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+ ctx->key_len);
+
return 0;
}
@@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
return ctx->hash_key;
}
+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
+{
+ if (rte_thash_gfni_supported)
+ return ctx->matrices;
+
+ rte_errno = ENOTSUP;
+ return NULL;
+}
+
static inline uint8_t
read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
{
@@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
for (i = 0; i < attempts; i++) {
- for (j = 0; j < (tuple_len / 4); j++)
- tmp_tuple[j] =
- rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
+ if (rte_thash_gfni_supported)
+ hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
+ else {
+ for (j = 0; j < (tuple_len / 4); j++)
+ tmp_tuple[j] =
+ rte_be_to_cpu_32(
+ *(uint32_t *)&tuple[j * 4]);
+
+ hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
+ }
- hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
adj_bits = rte_thash_get_complement(h, hash, desired_value);
/*
@@ -410,6 +410,25 @@ const uint8_t *
rte_thash_get_key(struct rte_thash_ctx *ctx);
/**
+ * Get a pointer to the toeplitz hash matrices contained in the context.
+ * These matrices could be used with fast toeplitz hash implementation if
+ * CPU supports GFNI.
+ * Matrices changes after each addition of a helper.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param ctx
+ * Thash context
+ * @return
+ * A pointer to the toeplitz hash key matrices on success
+ * NULL if GFNI is not supported.
+ */
+__rte_experimental
+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
+
+/**
* Function prototype for the rte_thash_adjust_tuple
* to check if adjusted tuple could be used.
* Generally it is some kind of lookup function to check
@@ -43,6 +43,7 @@ EXPERIMENTAL {
rte_thash_find_existing;
rte_thash_free_ctx;
rte_thash_get_complement;
+ rte_thash_get_gfni_matrices;
rte_thash_get_helper;
rte_thash_get_key;
rte_thash_gfni_supported;