@@ -472,7 +472,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -484,9 +484,9 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]);
#endif
+ /* A.1 load desc[2-0] */
descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -281,22 +281,22 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* in one XMM reg.
*/
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp1 = *(vector unsigned long *)&sw_ring[pos];
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = *(vector unsigned long *)(rxdp + 3);
rte_compiler_barrier();
/* B.2 copy 2 mbuf point into rx_pkts */
*(vector unsigned long *)&rx_pkts[pos] = mbp1;
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp2 = *(vector unsigned long *)&sw_ring[pos + 2];
+ /* A.1 load desc[2-0] */
descs[2] = *(vector unsigned long *)(rxdp + 2);
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = *(vector unsigned long *)(rxdp + 1);
rte_compiler_barrier();
descs[0] = *(vector unsigned long *)(rxdp);
@@ -280,20 +280,20 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq,
int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT};
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = vld1q_u64((uint64_t *)(rxdp + 3));
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
+ /* A.1 load desc[2-0] */
descs[2] = vld1q_u64((uint64_t *)(rxdp + 2));
- /* B.1 load 2 mbuf point */
descs[1] = vld1q_u64((uint64_t *)(rxdp + 1));
descs[0] = vld1q_u64((uint64_t *)(rxdp));
@@ -462,7 +462,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -474,9 +474,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
#endif
+ /* A.1 load desc[2-0] */
descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -494,7 +494,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -506,9 +506,9 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
#endif
+ /* A.1 load desc[2-0] */
descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -755,7 +755,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -767,9 +767,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
#endif
+ /* A.1 load desc[2-0] */
descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -416,7 +416,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -428,9 +428,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
#endif
+ /* A.1 load desc[2-0] */
descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
@@ -454,7 +454,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
/* Read desc statuses backwards to avoid race condition */
- /* A.1 load 4 pkts desc */
+ /* A.1 load desc[3] */
descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
rte_compiler_barrier();
@@ -466,9 +466,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
#endif
+ /* A.1 load desc[2-0] */
descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
rte_compiler_barrier();
- /* B.1 load 2 mbuf point */
descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
rte_compiler_barrier();
descs[0] = _mm_loadu_si128((__m128i *)(rxdp));