[v2,2/2] net: stop using mmx intrinsics

Message ID 1711642446-6880-3-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Accepted
Delegated to: Thomas Monjalon
Headers
Series stop using mmx intrinsics |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Tyler Retzlaff March 28, 2024, 4:14 p.m. UTC
  Update code to use only avx/sse intrinsics as mmx is not supported on
MSVC.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/net/net_crc_avx512.c | 27 +++++++--------------------
 lib/net/net_crc_sse.c    | 27 +++++++--------------------
 2 files changed, 14 insertions(+), 40 deletions(-)
  

Comments

Bruce Richardson March 28, 2024, 5:21 p.m. UTC | #1
On Thu, Mar 28, 2024 at 09:14:06AM -0700, Tyler Retzlaff wrote:
> Update code to use only avx/sse intrinsics as mmx is not supported on
> MSVC.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---

One comment inline below. With or without that suggestion:

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

>  lib/net/net_crc_avx512.c | 27 +++++++--------------------
>  lib/net/net_crc_sse.c    | 27 +++++++--------------------
>  2 files changed, 14 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
> index 0f0dee4..d18eb96 100644
> --- a/lib/net/net_crc_avx512.c
> +++ b/lib/net/net_crc_avx512.c
> @@ -5,11 +5,10 @@
>  #include <stdalign.h>
>  
>  #include <rte_common.h>
> +#include <rte_vect.h>
>  
>  #include "net_crc.h"
>  
> -#include <x86intrin.h>
> -
>  /* VPCLMULQDQ CRC computation context structure */
>  struct crc_vpclmulqdq_ctx {
>  	__m512i rk1_rk2;
> @@ -331,13 +330,10 @@ static const alignas(16) uint32_t mask2[4] = {
>  			c9, c10, c11);
>  	crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
>  			c16, c17, 0, 0);

Since the setr's below are being replaced, it would be nice to change these
ones above too. Long term I think it's going to be confusing having some
assignments set up as L->R, while others are R->L.


> -	crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
> -			_mm_cvtsi64_m64(c17));
> +	crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
>  
> -	crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
> -			_mm_cvtsi64_m64(c19));
> -	crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
> -			_mm_cvtsi64_m64(c21));
> +	crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
> +	crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
>  }

<snip>
  

Patch

diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
index 0f0dee4..d18eb96 100644
--- a/lib/net/net_crc_avx512.c
+++ b/lib/net/net_crc_avx512.c
@@ -5,11 +5,10 @@ 
 #include <stdalign.h>
 
 #include <rte_common.h>
+#include <rte_vect.h>
 
 #include "net_crc.h"
 
-#include <x86intrin.h>
-
 /* VPCLMULQDQ CRC computation context structure */
 struct crc_vpclmulqdq_ctx {
 	__m512i rk1_rk2;
@@ -331,13 +330,10 @@  static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 static void
@@ -378,13 +374,10 @@  static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc16_ccitt.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc16_ccitt.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc16_ccitt.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc16_ccitt.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc16_ccitt.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc16_ccitt.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc16_ccitt.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 void
@@ -392,12 +385,6 @@  static const alignas(16) uint32_t mask2[4] = {
 {
 	crc32_load_init_constants();
 	crc16_load_init_constants();
-
-	/*
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
 }
 
 uint32_t
diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c
index d673ae3..112dc94 100644
--- a/lib/net/net_crc_sse.c
+++ b/lib/net/net_crc_sse.c
@@ -6,12 +6,11 @@ 
 #include <string.h>
 
 #include <rte_common.h>
+#include <rte_vect.h>
 #include <rte_branch_prediction.h>
 
 #include "net_crc.h"
 
-#include <x86intrin.h>
-
 /** PCLMULQDQ CRC computation context structure */
 struct crc_pclmulqdq_ctx {
 	__m128i rk1_rk2;
@@ -272,12 +271,9 @@  static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x10811LLU;
 
 	/** Save the params in context structure */
-	crc16_ccitt_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc16_ccitt_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc16_ccitt_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+	crc16_ccitt_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc16_ccitt_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc16_ccitt_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 
 	/** Initialize CRC32 data */
 	k1 = 0xccaa009eLLU;
@@ -288,18 +284,9 @@  static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x1db710641LLU;
 
 	/** Save the params in context structure */
-	crc32_eth_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc32_eth_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc32_eth_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
-
-	/**
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
+	crc32_eth_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc32_eth_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc32_eth_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 }
 
 uint32_t