[v2,2/2] net: stop using mmx intrinsics
Checks
Commit Message
Update code to use only avx/sse intrinsics as mmx is not supported on
MSVC.
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
lib/net/net_crc_avx512.c | 27 +++++++--------------------
lib/net/net_crc_sse.c | 27 +++++++--------------------
2 files changed, 14 insertions(+), 40 deletions(-)
Comments
On Thu, Mar 28, 2024 at 09:14:06AM -0700, Tyler Retzlaff wrote:
> Update code to use only avx/sse intrinsics as mmx is not supported on
> MSVC.
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
One comment inline below. With or without that suggestion:
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> lib/net/net_crc_avx512.c | 27 +++++++--------------------
> lib/net/net_crc_sse.c | 27 +++++++--------------------
> 2 files changed, 14 insertions(+), 40 deletions(-)
>
> diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
> index 0f0dee4..d18eb96 100644
> --- a/lib/net/net_crc_avx512.c
> +++ b/lib/net/net_crc_avx512.c
> @@ -5,11 +5,10 @@
> #include <stdalign.h>
>
> #include <rte_common.h>
> +#include <rte_vect.h>
>
> #include "net_crc.h"
>
> -#include <x86intrin.h>
> -
> /* VPCLMULQDQ CRC computation context structure */
> struct crc_vpclmulqdq_ctx {
> __m512i rk1_rk2;
> @@ -331,13 +330,10 @@ static const alignas(16) uint32_t mask2[4] = {
> c9, c10, c11);
> crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
> c16, c17, 0, 0);
Since the setr's below are being replaced, it would be nice to change these
ones above too. Long term I think it's going to be confusing having some
assignments set up as L->R, while others are R->L.
> - crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
> - _mm_cvtsi64_m64(c17));
> + crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
>
> - crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
> - _mm_cvtsi64_m64(c19));
> - crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
> - _mm_cvtsi64_m64(c21));
> + crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
> + crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
> }
<snip>
@@ -5,11 +5,10 @@
#include <stdalign.h>
#include <rte_common.h>
+#include <rte_vect.h>
#include "net_crc.h"
-#include <x86intrin.h>
-
/* VPCLMULQDQ CRC computation context structure */
struct crc_vpclmulqdq_ctx {
__m512i rk1_rk2;
@@ -331,13 +330,10 @@ static const alignas(16) uint32_t mask2[4] = {
c9, c10, c11);
crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
c16, c17, 0, 0);
- crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
- _mm_cvtsi64_m64(c17));
+ crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
- crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
- _mm_cvtsi64_m64(c19));
- crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
- _mm_cvtsi64_m64(c21));
+ crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
+ crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
}
static void
@@ -378,13 +374,10 @@ static const alignas(16) uint32_t mask2[4] = {
c9, c10, c11);
crc16_ccitt.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
c16, c17, 0, 0);
- crc16_ccitt.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
- _mm_cvtsi64_m64(c17));
+ crc16_ccitt.fold_1x128b = _mm_set_epi64x(c17, c16);
- crc16_ccitt.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
- _mm_cvtsi64_m64(c19));
- crc16_ccitt.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
- _mm_cvtsi64_m64(c21));
+ crc16_ccitt.rk5_rk6 = _mm_set_epi64x(c19, c18);
+ crc16_ccitt.rk7_rk8 = _mm_set_epi64x(c21, c20);
}
void
@@ -392,12 +385,6 @@ static const alignas(16) uint32_t mask2[4] = {
{
crc32_load_init_constants();
crc16_load_init_constants();
-
- /*
- * Reset the register as following calculation may
- * use other data types such as float, double, etc.
- */
- _mm_empty();
}
uint32_t
@@ -6,12 +6,11 @@
#include <string.h>
#include <rte_common.h>
+#include <rte_vect.h>
#include <rte_branch_prediction.h>
#include "net_crc.h"
-#include <x86intrin.h>
-
/** PCLMULQDQ CRC computation context structure */
struct crc_pclmulqdq_ctx {
__m128i rk1_rk2;
@@ -272,12 +271,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
p = 0x10811LLU;
/** Save the params in context structure */
- crc16_ccitt_pclmulqdq.rk1_rk2 =
- _mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
- crc16_ccitt_pclmulqdq.rk5_rk6 =
- _mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
- crc16_ccitt_pclmulqdq.rk7_rk8 =
- _mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+ crc16_ccitt_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+ crc16_ccitt_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+ crc16_ccitt_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
/** Initialize CRC32 data */
k1 = 0xccaa009eLLU;
@@ -288,18 +284,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
p = 0x1db710641LLU;
/** Save the params in context structure */
- crc32_eth_pclmulqdq.rk1_rk2 =
- _mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
- crc32_eth_pclmulqdq.rk5_rk6 =
- _mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
- crc32_eth_pclmulqdq.rk7_rk8 =
- _mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
-
- /**
- * Reset the register as following calculation may
- * use other data types such as float, double, etc.
- */
- _mm_empty();
+ crc32_eth_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+ crc32_eth_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+ crc32_eth_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
}
uint32_t