[1/2] lib/hash: initialize __m128i data type in a portable way

Message ID 1732748278-14796-1-git-send-email-andremue@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [1/2] lib/hash: initialize __m128i data type in a portable way |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Andre Muezerie Nov. 27, 2024, 10:57 p.m. UTC
The mechanism used to initialize an __m128i data type in rte_thash.h is
non-portable and MSVC does not like it. It clearly is not doing what
is desired:

..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
    truncation from 'unsigned __int64' to 'char'
..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
    truncation from 'unsigned __int64' to 'char'

A more portable approach is to use compiler intrinsics to perform the
initialization. This patch uses a single compiler intrinsic to
initialize the data type using a sequence of 16 bytes stored in
memory.

There should be no perf degradation due to this change.

Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
---
 lib/hash/rte_thash.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
  

Comments

Andre Muezerie March 3, 2025, 10:27 p.m. UTC | #1
On Wed, Nov 27, 2024 at 02:57:57PM -0800, Andre Muezerie wrote:
> The mechanism used to initialize an __m128i data type in rte_thash.h is
> non-portable and MSVC does not like it. It clearly is not doing what
> is desired:
> 
> ..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
>     truncation from 'unsigned __int64' to 'char'
> ..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
>     truncation from 'unsigned __int64' to 'char'
> 
> A more portable approach is to use compiler intrinsics to perform the
> initialization. This patch uses a single compiler intrinsic to
> initialize the data type using a sequence of 16 bytes stored in
> memory.
> 
> There should be no perf degradation due to this change.
> 
> Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
> ---
>  lib/hash/rte_thash.h | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> index c0af5968df..3512639792 100644
> --- a/lib/hash/rte_thash.h
> +++ b/lib/hash/rte_thash.h
> @@ -34,8 +34,9 @@ extern "C" {
>  /* Byte swap mask used for converting IPv6 address
>   * 4-byte chunks to CPU byte order
>   */
> -static const __m128i rte_thash_ipv6_bswap_mask = {
> -		0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL};
> +static const uint8_t rte_thash_ipv6_bswap_mask[] = {
> +		0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
> +		0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C};
>  #endif
>  
>  /**
> @@ -152,12 +153,14 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig,
>  			union rte_thash_tuple *targ)
>  {
>  #ifdef RTE_ARCH_X86
> +	const __m128i ipv6_bswap_mask =
> +			_mm_loadu_si128((const __m128i*)&rte_thash_ipv6_bswap_mask);
>  	__m128i ipv6 = _mm_loadu_si128((const __m128i *)&orig->src_addr);
>  	*(__m128i *)&targ->v6.src_addr =
> -			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
> +			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
>  	ipv6 = _mm_loadu_si128((const __m128i *)&orig->dst_addr);
>  	*(__m128i *)&targ->v6.dst_addr =
> -			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
> +			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
>  #elif defined(__ARM_NEON)
>  	uint8x16_t ipv6 = vld1q_u8(orig->src_addr.a);
>  	vst1q_u8(targ->v6.src_addr.a, vrev32q_u8(ipv6));
> -- 
> 2.34.1

Could someone please review this patch and let me know if there are changes to be made?
I have other patches depending on this.

Thanks,
Andre Muezerie
  
Bruce Richardson March 4, 2025, 10:46 a.m. UTC | #2
On Wed, Nov 27, 2024 at 02:57:57PM -0800, Andre Muezerie wrote:
> The mechanism used to initialize an __m128i data type in rte_thash.h is
> non-portable and MSVC does not like it. It clearly is not doing what
> is desired:
> 
> ..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
>     truncation from 'unsigned __int64' to 'char'
> ..\lib\hash\rte_thash.h(38): warning C4305: 'initializing':
>     truncation from 'unsigned __int64' to 'char'
> 
> A more portable approach is to use compiler intrinsics to perform the
> initialization. This patch uses a single compiler intrinsic to
> initialize the data type using a sequence of 16 bytes stored in
> memory.
> 
> There should be no perf degradation due to this change.
> 
> Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
> ---
>  lib/hash/rte_thash.h | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
> index c0af5968df..3512639792 100644
> --- a/lib/hash/rte_thash.h
> +++ b/lib/hash/rte_thash.h
> @@ -34,8 +34,9 @@ extern "C" {
>  /* Byte swap mask used for converting IPv6 address
>   * 4-byte chunks to CPU byte order
>   */
> -static const __m128i rte_thash_ipv6_bswap_mask = {
> -		0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL};

Does work just changing this to using _mm_set_epi64x(), or _mm_set_epi32()? 
If necessary we can just move this constant into the function using it, to
set use those functions instead.

> +static const uint8_t rte_thash_ipv6_bswap_mask[] = {
> +		0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
> +		0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C};
>  #endif
>  
>  /**
> @@ -152,12 +153,14 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig,
>  			union rte_thash_tuple *targ)
>  {
>  #ifdef RTE_ARCH_X86
> +	const __m128i ipv6_bswap_mask =
> +			_mm_loadu_si128((const __m128i*)&rte_thash_ipv6_bswap_mask);
>  	__m128i ipv6 = _mm_loadu_si128((const __m128i *)&orig->src_addr);
>  	*(__m128i *)&targ->v6.src_addr =
> -			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
> +			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
>  	ipv6 = _mm_loadu_si128((const __m128i *)&orig->dst_addr);
>  	*(__m128i *)&targ->v6.dst_addr =
> -			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
> +			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
>  #elif defined(__ARM_NEON)
>  	uint8x16_t ipv6 = vld1q_u8(orig->src_addr.a);
>  	vst1q_u8(targ->v6.src_addr.a, vrev32q_u8(ipv6));
> -- 
> 2.34.1
>
  

Patch

diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index c0af5968df..3512639792 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -34,8 +34,9 @@  extern "C" {
 /* Byte swap mask used for converting IPv6 address
  * 4-byte chunks to CPU byte order
  */
-static const __m128i rte_thash_ipv6_bswap_mask = {
-		0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL};
+static const uint8_t rte_thash_ipv6_bswap_mask[] = {
+		0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
+		0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C};
 #endif
 
 /**
@@ -152,12 +153,14 @@  rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig,
 			union rte_thash_tuple *targ)
 {
 #ifdef RTE_ARCH_X86
+	const __m128i ipv6_bswap_mask =
+			_mm_loadu_si128((const __m128i*)&rte_thash_ipv6_bswap_mask);
 	__m128i ipv6 = _mm_loadu_si128((const __m128i *)&orig->src_addr);
 	*(__m128i *)&targ->v6.src_addr =
-			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
+			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
 	ipv6 = _mm_loadu_si128((const __m128i *)&orig->dst_addr);
 	*(__m128i *)&targ->v6.dst_addr =
-			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
+			_mm_shuffle_epi8(ipv6, ipv6_bswap_mask);
 #elif defined(__ARM_NEON)
 	uint8x16_t ipv6 = vld1q_u8(orig->src_addr.a);
 	vst1q_u8(targ->v6.src_addr.a, vrev32q_u8(ipv6));