[1/4] net/i40e: fix AVX-512 pointer copy on 32-bit

Message ID 20240906141127.628873-2-bruce.richardson@intel.com (mailing list archive)
State Accepted
Delegated to: Bruce Richardson
Headers
Series fix issues with using AVX-512 drivers on 32-bit |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Bruce Richardson Sept. 6, 2024, 2:11 p.m. UTC
The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: 5171b4ee6b6b ("net/i40e: optimize Tx by using AVX512")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)
  

Comments

Stokes, Ian Sept. 30, 2024, 1:27 p.m. UTC | #1
> The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
> copying 32 pointers only requires half the number of AVX-512 load store
> operations.
> 
> Fixes: 5171b4ee6b6b ("net/i40e: optimize Tx by using AVX512")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx_vec_avx512.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> index 0238b03f8a..3b2750221b 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> @@ -799,6 +799,7 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
>  		uint32_t copied = 0;
>  		/* n is multiple of 32 */
>  		while (copied < n) {
> +#ifdef RTE_ARCH_64
>  			const __m512i a = _mm512_load_si512(&txep[copied]);
>  			const __m512i b = _mm512_load_si512(&txep[copied +
> 8]);
>  			const __m512i c = _mm512_load_si512(&txep[copied +
> 16]);
> @@ -808,6 +809,12 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
>  			_mm512_storeu_si512(&cache_objs[copied + 8], b);
>  			_mm512_storeu_si512(&cache_objs[copied + 16], c);
>  			_mm512_storeu_si512(&cache_objs[copied + 24], d);
> +#else
> +			const __m512i a = _mm512_load_si512(&txep[copied]);
> +			const __m512i b = _mm512_load_si512(&txep[copied +
> 16]);
> +			_mm512_storeu_si512(&cache_objs[copied], a);
> +			_mm512_storeu_si512(&cache_objs[copied + 16], b);
> +#endif
>  			copied += 32;
>  		}
>  		cache->len += n;
> --
> 2.43.0

Looks good to me, ACKED.

Thanks
Ian
  

Patch

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 0238b03f8a..3b2750221b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -799,6 +799,7 @@  i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
 		while (copied < n) {
+#ifdef RTE_ARCH_64
 			const __m512i a = _mm512_load_si512(&txep[copied]);
 			const __m512i b = _mm512_load_si512(&txep[copied + 8]);
 			const __m512i c = _mm512_load_si512(&txep[copied + 16]);
@@ -808,6 +809,12 @@  i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 8], b);
 			_mm512_storeu_si512(&cache_objs[copied + 16], c);
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+			const __m512i a = _mm512_load_si512(&txep[copied]);
+			const __m512i b = _mm512_load_si512(&txep[copied + 16]);
+			_mm512_storeu_si512(&cache_objs[copied], a);
+			_mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
 			copied += 32;
 		}
 		cache->len += n;