net/bnxt: use shorter SIMD initializers
diff mbox series

Message ID 20201022185051.183164-1-lance.richardson@broadcom.com
State Accepted
Delegated to: Ajit Khaparde
Headers show
Series
  • net/bnxt: use shorter SIMD initializers
Related show

Checks

Context Check Description
ci/iol-mellanox-Performance success Performance Testing PASS
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK
ci/iol-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Lance Richardson Oct. 22, 2020, 6:50 p.m. UTC
Make SIMD initialization code less verbose by using appropriate
intrinsics when all lanes of a vector are initialized to the
same value.

Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++--------------------
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c  | 37 +++++------------
 2 files changed, 23 insertions(+), 72 deletions(-)

Comments

Ajit Khaparde Oct. 26, 2020, 4:10 a.m. UTC | #1
On Thu, Oct 22, 2020 at 11:51 AM Lance Richardson
<lance.richardson@broadcom.com> wrote:
>
> Make SIMD initialization code less verbose by using appropriate
> intrinsics when all lanes of a vector are initialized to the
> same value.
>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Patch applied to dpdk-next-net-brcm.

> ---
>  drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++--------------------
>  drivers/net/bnxt/bnxt_rxtx_vec_sse.c  | 37 +++++------------
>  2 files changed, 23 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index f49e29ccb..de1d96570 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4],
>                 0xFF, 0xFF,                /* vlan_tci (zeroes) */
>                 12, 13, 14, 15             /* rss hash */
>         };
> -       const uint32x4_t flags_type_mask = {
> -               RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -               RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -               RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -               RX_PKT_CMPL_FLAGS_ITYPE_MASK
> -       };
> -       const uint32x4_t flags2_mask1 = {
> -               RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                       RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -               RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                       RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -               RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                       RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -               RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                       RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
> -       };
> -       const uint32x4_t flags2_mask2 = {
> -               RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -               RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -               RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -               RX_PKT_CMPL_FLAGS2_IP_TYPE
> -       };
> -       const uint32x4_t rss_mask = {
> -               RX_PKT_CMPL_FLAGS_RSS_VALID,
> -               RX_PKT_CMPL_FLAGS_RSS_VALID,
> -               RX_PKT_CMPL_FLAGS_RSS_VALID,
> -               RX_PKT_CMPL_FLAGS_RSS_VALID
> -       };
> -       const uint32x4_t flags2_index_mask = {
> -               0x1F, 0x1F, 0x1F, 0x1F
> -       };
> -       const uint32x4_t flags2_error_mask = {
> -               0xF, 0xF, 0xF, 0xF
> -       };
> +       const uint32x4_t flags_type_mask =
> +               vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> +       const uint32x4_t flags2_mask1 =
> +               vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> +                           RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> +       const uint32x4_t flags2_mask2 =
> +               vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
> +       const uint32x4_t rss_mask =
> +               vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID);
> +       const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F);
> +       const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F);
>         uint32x4_t flags_type, flags2, index, errors, rss_flags;
>         uint32x4_t tmp, ptype_idx;
>         uint64x2_t t0, t1;
> @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>         uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
>         struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
>         uint64_t valid, desc_valid_mask = ~0UL;
> -       const uint32x4_t info3_v_mask = {
> -               CMPL_BASE_V, CMPL_BASE_V,
> -               CMPL_BASE_V, CMPL_BASE_V
> -       };
> +       const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V);
>         uint32_t raw_cons = cpr->cp_raw_cons;
>         uint32_t cons, mbcons;
>         int nb_rx_pkts = 0;
>         const uint64x2_t mb_init = {rxq->mbuf_initializer, 0};
> -       const uint32x4_t valid_target = {
> -               !!(raw_cons & cp_ring_size),
> -               !!(raw_cons & cp_ring_size),
> -               !!(raw_cons & cp_ring_size),
> -               !!(raw_cons & cp_ring_size)
> -       };
> +       const uint32x4_t valid_target =
> +               vdupq_n_u32(!!(raw_cons & cp_ring_size));
>         int i;
>
>         /* If Rx Q was stopped return */
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index e4ba63551..e12bf8bb7 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
>                              0xFF, 0xFF, 3, 2,        /* pkt_len */
>                              0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
>         const __m128i flags_type_mask =
> -               _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -                             RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -                             RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -                             RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> +               _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
>         const __m128i flags2_mask1 =
> -               _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                               RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -                             RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                               RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -                             RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                               RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -                             RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -                               RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> +               _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> +                              RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
>         const __m128i flags2_mask2 =
> -               _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -                             RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -                             RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -                             RX_PKT_CMPL_FLAGS2_IP_TYPE);
> +               _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
>         const __m128i rss_mask =
> -               _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID,
> -                             RX_PKT_CMPL_FLAGS_RSS_VALID,
> -                             RX_PKT_CMPL_FLAGS_RSS_VALID,
> -                             RX_PKT_CMPL_FLAGS_RSS_VALID);
> +               _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID);
>         __m128i t0, t1, flags_type, flags2, index, errors, rss_flags;
>         __m128i ptype_idx;
>         uint32_t ol_flags;
> @@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
>         t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]);
>
>         /* Compute ol_flags and checksum error indexes for four packets. */
> -       flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F));
> +       flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F));
>
>         errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4);
> -       errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF));
> +       errors = _mm_and_si128(errors, _mm_set1_epi32(0xF));
>         errors = _mm_and_si128(errors, flags2);
>
>         index = _mm_andnot_si128(errors, flags2);
> @@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>         uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
>         struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
>         uint64_t valid, desc_valid_mask = ~0ULL;
> -       const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V,
> -                                                  CMPL_BASE_V, CMPL_BASE_V);
> +       const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V);
>         uint32_t raw_cons = cpr->cp_raw_cons;
>         uint32_t cons, mbcons;
>         int nb_rx_pkts = 0;
>         const __m128i valid_target =
> -               _mm_set_epi32(!!(raw_cons & cp_ring_size),
> -                             !!(raw_cons & cp_ring_size),
> -                             !!(raw_cons & cp_ring_size),
> -                             !!(raw_cons & cp_ring_size));
> +               _mm_set1_epi32(!!(raw_cons & cp_ring_size));
>         int i;
>
>         /* If Rx Q was stopped return */
> --
> 2.25.1
>

Patch
diff mbox series

diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index f49e29ccb..de1d96570 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -67,40 +67,17 @@  descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4],
 		0xFF, 0xFF,                /* vlan_tci (zeroes) */
 		12, 13, 14, 15             /* rss hash */
 	};
-	const uint32x4_t flags_type_mask = {
-		RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-		RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-		RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-		RX_PKT_CMPL_FLAGS_ITYPE_MASK
-	};
-	const uint32x4_t flags2_mask1 = {
-		RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-			RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-		RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-			RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-		RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-			RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-		RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-			RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
-	};
-	const uint32x4_t flags2_mask2 = {
-		RX_PKT_CMPL_FLAGS2_IP_TYPE,
-		RX_PKT_CMPL_FLAGS2_IP_TYPE,
-		RX_PKT_CMPL_FLAGS2_IP_TYPE,
-		RX_PKT_CMPL_FLAGS2_IP_TYPE
-	};
-	const uint32x4_t rss_mask = {
-		RX_PKT_CMPL_FLAGS_RSS_VALID,
-		RX_PKT_CMPL_FLAGS_RSS_VALID,
-		RX_PKT_CMPL_FLAGS_RSS_VALID,
-		RX_PKT_CMPL_FLAGS_RSS_VALID
-	};
-	const uint32x4_t flags2_index_mask = {
-		0x1F, 0x1F, 0x1F, 0x1F
-	};
-	const uint32x4_t flags2_error_mask = {
-		0xF, 0xF, 0xF, 0xF
-	};
+	const uint32x4_t flags_type_mask =
+		vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
+	const uint32x4_t flags2_mask1 =
+		vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			    RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
+	const uint32x4_t flags2_mask2 =
+		vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
+	const uint32x4_t rss_mask =
+		vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID);
+	const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F);
+	const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F);
 	uint32x4_t flags_type, flags2, index, errors, rss_flags;
 	uint32x4_t tmp, ptype_idx;
 	uint64x2_t t0, t1;
@@ -180,20 +157,13 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
 	struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
 	uint64_t valid, desc_valid_mask = ~0UL;
-	const uint32x4_t info3_v_mask = {
-		CMPL_BASE_V, CMPL_BASE_V,
-		CMPL_BASE_V, CMPL_BASE_V
-	};
+	const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V);
 	uint32_t raw_cons = cpr->cp_raw_cons;
 	uint32_t cons, mbcons;
 	int nb_rx_pkts = 0;
 	const uint64x2_t mb_init = {rxq->mbuf_initializer, 0};
-	const uint32x4_t valid_target = {
-		!!(raw_cons & cp_ring_size),
-		!!(raw_cons & cp_ring_size),
-		!!(raw_cons & cp_ring_size),
-		!!(raw_cons & cp_ring_size)
-	};
+	const uint32x4_t valid_target =
+		vdupq_n_u32(!!(raw_cons & cp_ring_size));
 	int i;
 
 	/* If Rx Q was stopped return */
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index e4ba63551..e12bf8bb7 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -63,29 +63,14 @@  descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
 			     0xFF, 0xFF, 3, 2,        /* pkt_len */
 			     0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
 	const __m128i flags_type_mask =
-		_mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-			      RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-			      RX_PKT_CMPL_FLAGS_ITYPE_MASK,
-			      RX_PKT_CMPL_FLAGS_ITYPE_MASK);
+		_mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
 	const __m128i flags2_mask1 =
-		_mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-				RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-			      RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-				RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-			      RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-				RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
-			      RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
-				RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
+		_mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+			       RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
 	const __m128i flags2_mask2 =
-		_mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE,
-			      RX_PKT_CMPL_FLAGS2_IP_TYPE,
-			      RX_PKT_CMPL_FLAGS2_IP_TYPE,
-			      RX_PKT_CMPL_FLAGS2_IP_TYPE);
+		_mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
 	const __m128i rss_mask =
-		_mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID,
-			      RX_PKT_CMPL_FLAGS_RSS_VALID,
-			      RX_PKT_CMPL_FLAGS_RSS_VALID,
-			      RX_PKT_CMPL_FLAGS_RSS_VALID);
+		_mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID);
 	__m128i t0, t1, flags_type, flags2, index, errors, rss_flags;
 	__m128i ptype_idx;
 	uint32_t ol_flags;
@@ -114,10 +99,10 @@  descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
 	t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]);
 
 	/* Compute ol_flags and checksum error indexes for four packets. */
-	flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F));
+	flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F));
 
 	errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4);
-	errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF));
+	errors = _mm_and_si128(errors, _mm_set1_epi32(0xF));
 	errors = _mm_and_si128(errors, flags2);
 
 	index = _mm_andnot_si128(errors, flags2);
@@ -165,16 +150,12 @@  bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
 	struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
 	uint64_t valid, desc_valid_mask = ~0ULL;
-	const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V,
-						   CMPL_BASE_V, CMPL_BASE_V);
+	const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V);
 	uint32_t raw_cons = cpr->cp_raw_cons;
 	uint32_t cons, mbcons;
 	int nb_rx_pkts = 0;
 	const __m128i valid_target =
-		_mm_set_epi32(!!(raw_cons & cp_ring_size),
-			      !!(raw_cons & cp_ring_size),
-			      !!(raw_cons & cp_ring_size),
-			      !!(raw_cons & cp_ring_size));
+		_mm_set1_epi32(!!(raw_cons & cp_ring_size));
 	int i;
 
 	/* If Rx Q was stopped return */