[2/5] hash: enable gfni thash implementation

Message ID 1630944239-363648-3-git-send-email-vladimir.medvedkin@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series optimized Toeplitz hash implementation |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Vladimir Medvedkin Sept. 6, 2021, 4:03 p.m. UTC
  This patch enables new GFNI Toeplitz hash in
predictable RSS library.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
 lib/hash/rte_thash.h | 19 +++++++++++++++++++
 lib/hash/version.map |  1 +
 3 files changed, 59 insertions(+), 4 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 8, 2021, 11:31 a.m. UTC | #1
> This patch enables new GFNI Toeplitz hash in
> predictable RSS library.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
>  lib/hash/rte_thash.h | 19 +++++++++++++++++++
>  lib/hash/version.map |  1 +
>  3 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
> index 07447f7..86a0e96 100644
> --- a/lib/hash/rte_thash.c
> +++ b/lib/hash/rte_thash.c
> @@ -86,6 +86,8 @@ struct rte_thash_ctx {
>  	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
>  	uint32_t	subtuples_nb;	/** < number of subtuples */
>  	uint32_t	flags;
> +	uint64_t	*matrices;

Comment, what is that, etc.

> +	/**< rte_thash_complete_matrix generated matrices */
>  	uint8_t		hash_key[0];
>  };
> 
> @@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
>  			ctx->hash_key[i] = rte_rand();
>  	}
> 
> +	if (rte_thash_gfni_supported) {

I think it should be:
if (rte_thash_gfni_supported && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)


> +		ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
> +			RTE_CACHE_LINE_SIZE);

You can do it probably before allocation ctx, at the same place where te is allocated.
Might be a bit nicer.

> +		if (ctx->matrices == NULL)

		RTE_LOG(ERR, ...);
		rte_ernno = ENOMEM;

> +			goto free_ctx;
> +
> +		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
> +			key_len);
> +	}
> +
>  	te->data = (void *)ctx;
>  	TAILQ_INSERT_TAIL(thash_list, te, next);
> 
>  	rte_mcfg_tailq_write_unlock();
> 
>  	return ctx;
> +
> +free_ctx:
> +	rte_free(ctx);
>  free_te:
>  	rte_free(te);
>  exit:
> @@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
>  			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
>  	}
> 
> +	if (rte_thash_gfni_supported)

Here and in data-path functions, I think it would be better:
if (ctx->matrices != NULL)
> +		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
> +			ctx->key_len);
> +
>  	return 0;
>  }
> 
> @@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
>  	return ctx->hash_key;
>  }
> 
> +const uint64_t *
> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
> +{
> +	if (rte_thash_gfni_supported)
> +		return ctx->matrices;

Why not just always:
return ctx->matices;
?

> +
> +	rte_errno = ENOTSUP;
> +	return NULL;
> +}
> +
>  static inline uint8_t
>  read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
>  {
> @@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
>  	attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
> 
>  	for (i = 0; i < attempts; i++) {
> -		for (j = 0; j < (tuple_len / 4); j++)
> -			tmp_tuple[j] =
> -				rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
> +		if (rte_thash_gfni_supported)
if (ctx->matrices)

> +			hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
> +		else {
> +			for (j = 0; j < (tuple_len / 4); j++)
> +				tmp_tuple[j] =
> +					rte_be_to_cpu_32(
> +						*(uint32_t *)&tuple[j * 4]);
> +
> +			hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
> +		}
> 
> -		hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
>  		adj_bits = rte_thash_get_complement(h, hash, desired_value);
> 
>  		/*
> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> index e3f1fc6..6e6861c 100644
> --- a/lib/hash/rte_thash.h
> +++ b/lib/hash/rte_thash.h
> @@ -410,6 +410,25 @@ const uint8_t *
>  rte_thash_get_key(struct rte_thash_ctx *ctx);
> 
>  /**
> + * Get a pointer to the toeplitz hash matrices contained in the context.
> + * These matrices could be used with fast toeplitz hash implementation if
> + * CPU supports GFNI.
> + * Matrices changes after each addition of a helper.
> + *
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * @param ctx
> + *  Thash context
> + * @return
> + *  A pointer to the toeplitz hash key matrices on success
> + *  NULL if GFNI is not supported.
> + */
> +__rte_experimental
> +const uint64_t *
> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
> +
> +/**
>   * Function prototype for the rte_thash_adjust_tuple
>   * to check if adjusted tuple could be used.
>   * Generally it is some kind of lookup function to check
> diff --git a/lib/hash/version.map b/lib/hash/version.map
> index cecf922..3eda695 100644
> --- a/lib/hash/version.map
> +++ b/lib/hash/version.map
> @@ -43,6 +43,7 @@ EXPERIMENTAL {
>  	rte_thash_find_existing;
>  	rte_thash_free_ctx;
>  	rte_thash_get_complement;
> +	rte_thash_get_gfni_matrices;
>  	rte_thash_get_helper;
>  	rte_thash_get_key;
>  	rte_thash_gfni_supported;
> --
> 2.7.4
  
Vladimir Medvedkin Oct. 15, 2021, 9:13 a.m. UTC | #2
Hi Konstantin,

On 08/10/2021 13:31, Ananyev, Konstantin wrote:
> 
>> This patch enables new GFNI Toeplitz hash in
>> predictable RSS library.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>   lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
>>   lib/hash/rte_thash.h | 19 +++++++++++++++++++
>>   lib/hash/version.map |  1 +
>>   3 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
>> index 07447f7..86a0e96 100644
>> --- a/lib/hash/rte_thash.c
>> +++ b/lib/hash/rte_thash.c
>> @@ -86,6 +86,8 @@ struct rte_thash_ctx {
>>   	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
>>   	uint32_t	subtuples_nb;	/** < number of subtuples */
>>   	uint32_t	flags;
>> +	uint64_t	*matrices;
> 
> Comment, what is that, etc.
> 

I'll rephrase the comment below.

>> +	/**< rte_thash_complete_matrix generated matrices */
>>   	uint8_t		hash_key[0];
>>   };
>>
>> @@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
>>   			ctx->hash_key[i] = rte_rand();
>>   	}
>>
>> +	if (rte_thash_gfni_supported) {
> 
> I think it should be:
> if (rte_thash_gfni_supported && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
> 
> 

Agree

>> +		ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
>> +			RTE_CACHE_LINE_SIZE);
> 
> You can do it probably before allocation ctx, at the same place where te is allocated.
> Might be a bit nicer.
> 

I'd prefer to keep allocation and initialization of matrices in one 
place, below there is rte_thash_complete_matrix() which uses previously 
generated ctx->hash_key.

>> +		if (ctx->matrices == NULL)
> 
> 		RTE_LOG(ERR, ...);
> 		rte_ernno = ENOMEM;
> 

Agree

>> +			goto free_ctx;
>> +
>> +		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
>> +			key_len);
>> +	}
>> +
>>   	te->data = (void *)ctx;
>>   	TAILQ_INSERT_TAIL(thash_list, te, next);
>>
>>   	rte_mcfg_tailq_write_unlock();
>>
>>   	return ctx;
>> +
>> +free_ctx:
>> +	rte_free(ctx);
>>   free_te:
>>   	rte_free(te);
>>   exit:
>> @@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
>>   			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
>>   	}
>>
>> +	if (rte_thash_gfni_supported)
> 
> Here and in data-path functions, I think it would be better:
> if (ctx->matrices != NULL)

Agree

>> +		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
>> +			ctx->key_len);
>> +
>>   	return 0;
>>   }
>>
>> @@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
>>   	return ctx->hash_key;
>>   }
>>
>> +const uint64_t *
>> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
>> +{
>> +	if (rte_thash_gfni_supported)
>> +		return ctx->matrices;
> 
> Why not just always:
> return ctx->matices;
> ?
> 

Agree

>> +
>> +	rte_errno = ENOTSUP;
>> +	return NULL;
>> +}
>> +
>>   static inline uint8_t
>>   read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
>>   {
>> @@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
>>   	attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
>>
>>   	for (i = 0; i < attempts; i++) {
>> -		for (j = 0; j < (tuple_len / 4); j++)
>> -			tmp_tuple[j] =
>> -				rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
>> +		if (rte_thash_gfni_supported)
> if (ctx->matrices)
> 
>> +			hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
>> +		else {
>> +			for (j = 0; j < (tuple_len / 4); j++)
>> +				tmp_tuple[j] =
>> +					rte_be_to_cpu_32(
>> +						*(uint32_t *)&tuple[j * 4]);
>> +
>> +			hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
>> +		}
>>
>> -		hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
>>   		adj_bits = rte_thash_get_complement(h, hash, desired_value);
>>
>>   		/*
>> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
>> index e3f1fc6..6e6861c 100644
>> --- a/lib/hash/rte_thash.h
>> +++ b/lib/hash/rte_thash.h
>> @@ -410,6 +410,25 @@ const uint8_t *
>>   rte_thash_get_key(struct rte_thash_ctx *ctx);
>>
>>   /**
>> + * Get a pointer to the toeplitz hash matrices contained in the context.
>> + * These matrices could be used with fast toeplitz hash implementation if
>> + * CPU supports GFNI.
>> + * Matrices changes after each addition of a helper.
>> + *
>> + * @warning
>> + * @b EXPERIMENTAL: this API may change without prior notice.
>> + *
>> + * @param ctx
>> + *  Thash context
>> + * @return
>> + *  A pointer to the toeplitz hash key matrices on success
>> + *  NULL if GFNI is not supported.
>> + */
>> +__rte_experimental
>> +const uint64_t *
>> +rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
>> +
>> +/**
>>    * Function prototype for the rte_thash_adjust_tuple
>>    * to check if adjusted tuple could be used.
>>    * Generally it is some kind of lookup function to check
>> diff --git a/lib/hash/version.map b/lib/hash/version.map
>> index cecf922..3eda695 100644
>> --- a/lib/hash/version.map
>> +++ b/lib/hash/version.map
>> @@ -43,6 +43,7 @@ EXPERIMENTAL {
>>   	rte_thash_find_existing;
>>   	rte_thash_free_ctx;
>>   	rte_thash_get_complement;
>> +	rte_thash_get_gfni_matrices;
>>   	rte_thash_get_helper;
>>   	rte_thash_get_key;
>>   	rte_thash_gfni_supported;
>> --
>> 2.7.4
>
  

Patch

diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
index 07447f7..86a0e96 100644
--- a/lib/hash/rte_thash.c
+++ b/lib/hash/rte_thash.c
@@ -86,6 +86,8 @@  struct rte_thash_ctx {
 	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
 	uint32_t	subtuples_nb;	/** < number of subtuples */
 	uint32_t	flags;
+	uint64_t	*matrices;
+	/**< rte_thash_complete_matrix generated matrices */
 	uint8_t		hash_key[0];
 };
 
@@ -253,12 +255,25 @@  rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
 			ctx->hash_key[i] = rte_rand();
 	}
 
+	if (rte_thash_gfni_supported) {
+		ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
+			RTE_CACHE_LINE_SIZE);
+		if (ctx->matrices == NULL)
+			goto free_ctx;
+
+		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+			key_len);
+	}
+
 	te->data = (void *)ctx;
 	TAILQ_INSERT_TAIL(thash_list, te, next);
 
 	rte_mcfg_tailq_write_unlock();
 
 	return ctx;
+
+free_ctx:
+	rte_free(ctx);
 free_te:
 	rte_free(te);
 exit:
@@ -372,6 +387,10 @@  generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
 			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
 	}
 
+	if (rte_thash_gfni_supported)
+		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+			ctx->key_len);
+
 	return 0;
 }
 
@@ -628,6 +647,16 @@  rte_thash_get_key(struct rte_thash_ctx *ctx)
 	return ctx->hash_key;
 }
 
+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
+{
+	if (rte_thash_gfni_supported)
+		return ctx->matrices;
+
+	rte_errno = ENOTSUP;
+	return NULL;
+}
+
 static inline uint8_t
 read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
 {
@@ -739,11 +768,17 @@  rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
 	attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
 
 	for (i = 0; i < attempts; i++) {
-		for (j = 0; j < (tuple_len / 4); j++)
-			tmp_tuple[j] =
-				rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
+		if (rte_thash_gfni_supported)
+			hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
+		else {
+			for (j = 0; j < (tuple_len / 4); j++)
+				tmp_tuple[j] =
+					rte_be_to_cpu_32(
+						*(uint32_t *)&tuple[j * 4]);
+
+			hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
+		}
 
-		hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
 		adj_bits = rte_thash_get_complement(h, hash, desired_value);
 
 		/*
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index e3f1fc6..6e6861c 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -410,6 +410,25 @@  const uint8_t *
 rte_thash_get_key(struct rte_thash_ctx *ctx);
 
 /**
+ * Get a pointer to the toeplitz hash matrices contained in the context.
+ * These matrices could be used with fast toeplitz hash implementation if
+ * CPU supports GFNI.
+ * Matrices changes after each addition of a helper.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param ctx
+ *  Thash context
+ * @return
+ *  A pointer to the toeplitz hash key matrices on success
+ *  NULL if GFNI is not supported.
+ */
+__rte_experimental
+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
+
+/**
  * Function prototype for the rte_thash_adjust_tuple
  * to check if adjusted tuple could be used.
  * Generally it is some kind of lookup function to check
diff --git a/lib/hash/version.map b/lib/hash/version.map
index cecf922..3eda695 100644
--- a/lib/hash/version.map
+++ b/lib/hash/version.map
@@ -43,6 +43,7 @@  EXPERIMENTAL {
 	rte_thash_find_existing;
 	rte_thash_free_ctx;
 	rte_thash_get_complement;
+	rte_thash_get_gfni_matrices;
 	rte_thash_get_helper;
 	rte_thash_get_key;
 	rte_thash_gfni_supported;