[v6,09/23] net/i40e: use mbuf descriptor accessors

Message ID 1709012499-12813-10-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series stop and remove RTE_MARKER typedefs |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tyler Retzlaff Feb. 27, 2024, 5:41 a.m. UTC
  RTE_MARKER typedefs are a GCC extension unsupported by MSVC. Use
new rte_mbuf_rearm_data and rte_mbuf_rx_descriptor_fields1 accessors
that provide a compatible type pointer without using the marker fields.

Use rte_mbuf_prefetch_part2() to prefetch cacheline1 and remove
reference to rte marker field.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 drivers/net/i40e/i40e_rxtx_vec_altivec.c | 18 +++++--------
 drivers/net/i40e/i40e_rxtx_vec_avx2.c    | 34 ++++++-------------------
 drivers/net/i40e/i40e_rxtx_vec_avx512.c  | 35 +++++++-------------------
 drivers/net/i40e/i40e_rxtx_vec_common.h  |  4 +--
 drivers/net/i40e/i40e_rxtx_vec_neon.c    | 16 ++++++------
 drivers/net/i40e/i40e_rxtx_vec_sse.c     | 43 +++++++-------------------------
 6 files changed, 41 insertions(+), 109 deletions(-)
  

Patch

diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index b6b0d38..3e065ee 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -55,7 +55,6 @@ 
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
 	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__vector unsigned long vaddr0, vaddr1;
-		uintptr_t p0, p1;
 
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
@@ -66,10 +65,8 @@ 
 		  * anyway. So overwrite whole 8 bytes with one load:
 		  * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 		  */
-		p0 = (uintptr_t)&mb0->rearm_data;
-		*(uint64_t *)p0 = rxq->mbuf_initializer;
-		p1 = (uintptr_t)&mb1->rearm_data;
-		*(uint64_t *)p1 = rxq->mbuf_initializer;
+		*rte_mbuf_rearm_data(mb0) = rxq->mbuf_initializer;
+		*rte_mbuf_rearm_data(mb1) = rxq->mbuf_initializer;
 
 		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		vaddr0 = vec_ld(0, (__vector unsigned long *)&mb0->buf_addr);
@@ -370,12 +367,10 @@ 
 
 		/* D.3 copy final 3,4 data to rx_pkts */
 		vec_st(pkt_mb4, 0,
-		 (__vector unsigned char *)&rx_pkts[pos + 3]
-			->rx_descriptor_fields1
+		 (__vector unsigned char *)rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 3])
 		);
 		vec_st(pkt_mb3, 0,
-		 (__vector unsigned char *)&rx_pkts[pos + 2]
-			->rx_descriptor_fields1
+		 (__vector unsigned char *)rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 2])
 		);
 
 		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
@@ -422,11 +417,10 @@ 
 
 		/* D.3 copy final 1,2 data to rx_pkts */
 		vec_st(pkt_mb2, 0,
-		 (__vector unsigned char *)&rx_pkts[pos + 1]
-			->rx_descriptor_fields1
+		 (__vector unsigned char *)rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 1])
 		);
 		vec_st(pkt_mb1, 0,
-		 (__vector unsigned char *)&rx_pkts[pos]->rx_descriptor_fields1
+		 (__vector unsigned char *)rte_mbuf_rx_descriptor_fields1(rx_pkts[pos])
 		);
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		desc_to_olflags_v(descs, &rx_pkts[pos]);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index f468c1f..360d80f 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -180,19 +180,6 @@ 
 			0xFF, 0xFF,  /* pkt_type set as unknown */
 			0xFF, 0xFF   /*pkt_type set as unknown */
 	);
-	/*
-	 * compile-time check the above crc and shuffle layout is correct.
-	 * NOTE: the first field (lowest address) is given last in set_epi
-	 * calls above.
-	 */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
 
 	/* Status/Error flag masks */
 	/*
@@ -525,11 +512,6 @@ 
 		 * add in the previously computed rx_descriptor fields to
 		 * make a single 256-bit write per mbuf
 		 */
-		/* check the structure matches expectations */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-				offsetof(struct rte_mbuf, rearm_data) + 8);
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
-				RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
 		/* build up data and do writes */
 		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
 				rearm6, rearm7;
@@ -543,10 +525,10 @@ 
 		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
 		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
 		/* write to mbuf */
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data, rearm6);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data, rearm4);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data, rearm2);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data, rearm0);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 6]), rearm6);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 4]), rearm4);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 2]), rearm2);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 0]), rearm0);
 
 		/* repeat for the odd mbufs */
 		const __m256i odd_flags = _mm256_castsi128_si256(
@@ -561,10 +543,10 @@ 
 		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
 		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
 		/* again write to mbufs */
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data, rearm7);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data, rearm5);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data, rearm3);
-		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data, rearm1);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 7]), rearm7);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 5]), rearm5);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 3]), rearm3);
+		_mm256_storeu_si256((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 1]), rearm1);
 
 		/* extract and record EOP bit */
 		if (split_packet) {
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index f3050cd..e13bd2f 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -170,18 +170,6 @@ 
 			 /* pkt_type set as unknown */
 			 0xFFFFFFFF
 			);
-	/* compile-time check the above crc and shuffle layout is correct.
-	 * NOTE: the first field (lowest address) is given last in set_epi
-	 * calls above.
-	 */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
 
 	/* Status/Error flag masks */
 	/* mask everything except RSS, flow director and VLAN flags
@@ -557,11 +545,6 @@ 
 		 * add in the previously computed rx_descriptor fields to
 		 * make a single 256-bit write per mbuf
 		 */
-		/* check the structure matches expectations */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-				offsetof(struct rte_mbuf, rearm_data) + 8);
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
-				RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
 		/* build up data and do writes */
 		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
 				rearm6, rearm7;
@@ -580,13 +563,13 @@ 
 		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
 		/* write to mbuf */
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 6]->rearm_data, rearm6);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 6]), rearm6);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 4]->rearm_data, rearm4);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 4]), rearm4);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 2]->rearm_data, rearm2);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 2]), rearm2);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 0]->rearm_data, rearm0);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 0]), rearm0);
 
 		/* repeat for the odd mbufs */
 		const __m256i odd_flags = _mm256_castsi128_si256
@@ -606,13 +589,13 @@ 
 		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
 		/* again write to mbufs */
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 7]->rearm_data, rearm7);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 7]), rearm7);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 5]->rearm_data, rearm5);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 5]), rearm5);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 3]->rearm_data, rearm3);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 3]), rearm3);
 		_mm256_storeu_si256
-			((__m256i *)&rx_pkts[i + 1]->rearm_data, rearm1);
+			((__m256i *)rte_mbuf_rearm_data(rx_pkts[i + 1]), rearm1);
 
 		/* extract and record EOP bit */
 		if (split_packet) {
@@ -826,7 +809,7 @@ 
 		free[0] = m;
 		nb_free = 1;
 		for (i = 1; i < n; i++) {
-			rte_prefetch0(&txep[i + 3].mbuf->cacheline1);
+			rte_mbuf_prefetch_part2(txep[i + 3].mbuf);
 			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
 			if (likely(m)) {
 				if (likely(m->pool == free[0]->pool)) {
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 8b74563..5633268 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -189,7 +189,6 @@ 
 static inline int
 i40e_rxq_vec_setup_default(struct i40e_rx_queue *rxq)
 {
-	uintptr_t p;
 	struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
 
 	mb_def.nb_segs = 1;
@@ -199,8 +198,7 @@ 
 
 	/* prevent compiler reordering: rearm_data covers previous fields */
 	rte_compiler_barrier();
-	p = (uintptr_t)&mb_def.rearm_data;
-	rxq->mbuf_initializer = *(uint64_t *)p;
+	rxq->mbuf_initializer = *rte_mbuf_rearm_data(&mb_def);
 	rxq->rx_using_sse = 1;
 	return 0;
 }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index d873e30..29dfd92 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -300,10 +300,10 @@ 
 	rearm2 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 2), mbuf_init, 1);
 	rearm3 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 3), mbuf_init, 1);
 
-	vst1q_u64((uint64_t *)&rx_pkts[0]->rearm_data, rearm0);
-	vst1q_u64((uint64_t *)&rx_pkts[1]->rearm_data, rearm1);
-	vst1q_u64((uint64_t *)&rx_pkts[2]->rearm_data, rearm2);
-	vst1q_u64((uint64_t *)&rx_pkts[3]->rearm_data, rearm3);
+	vst1q_u64(rte_mbuf_rearm_data(rx_pkts[0]), rearm0);
+	vst1q_u64(rte_mbuf_rearm_data(rx_pkts[1]), rearm1);
+	vst1q_u64(rte_mbuf_rearm_data(rx_pkts[2]), rearm2);
+	vst1q_u64(rte_mbuf_rearm_data(rx_pkts[3]), rearm3);
 }
 
 #define PKTLEN_SHIFT     10
@@ -492,13 +492,13 @@ 
 		pkt_mb1 = vreinterpretq_u8_u16(tmp);
 
 		/* D.3 copy final data to rx_pkts */
-		vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+		vst1q_u8(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 3]),
 				pkt_mb4);
-		vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+		vst1q_u8(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 2]),
 				pkt_mb3);
-		vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+		vst1q_u8(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 1]),
 				pkt_mb2);
-		vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+		vst1q_u8(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos]),
 				pkt_mb1);
 
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 2d4480a..994c5e1 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -315,14 +315,10 @@ 
 	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(vlan0, 4), 0x10);
 
 	/* write the rearm data and the olflags in one write */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			offsetof(struct rte_mbuf, rearm_data) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
-			RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
-	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
-	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
-	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
-	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+	_mm_store_si128((__m128i *)rte_mbuf_rearm_data(rx_pkts[0]), rearm0);
+	_mm_store_si128((__m128i *)rte_mbuf_rearm_data(rx_pkts[1]), rearm1);
+	_mm_store_si128((__m128i *)rte_mbuf_rearm_data(rx_pkts[2]), rearm2);
+	_mm_store_si128((__m128i *)rte_mbuf_rearm_data(rx_pkts[3]), rearm3);
 }
 
 #define PKTLEN_SHIFT     10
@@ -369,15 +365,7 @@ 
 				-rxq->crc_len, /* sub crc on pkt_len */
 				0, 0            /* ignore pkt_type field */
 			);
-	/*
-	 * compile-time check the above crc_adjust layout is correct.
-	 * NOTE: the first field (lowest address) is given last in set_epi16
-	 * call above.
-	 */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
 	__m128i dd_check, eop_check;
 
 	/* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
@@ -419,19 +407,6 @@ 
 		0xFF, 0xFF,  /* pkt_type set as unknown */
 		0xFF, 0xFF  /*pkt_type set as unknown */
 		);
-	/*
-	 * Compile-time verify the shuffle mask
-	 * NOTE: some field positions already verified above, but duplicated
-	 * here for completeness in case of future modifications.
-	 */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
-			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
 
 	/* Cache is empty -> need to scan the buffer rings, but first move
 	 * the next 'n' mbufs into the cache
@@ -535,9 +510,9 @@ 
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
 
 		/* D.3 copy final 3,4 data to rx_pkts */
-		_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,
+		_mm_storeu_si128(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 3]),
 				 pkt_mb4);
-		_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,
+		_mm_storeu_si128(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 2]),
 				 pkt_mb3);
 
 		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
@@ -571,9 +546,9 @@ 
 		staterr = _mm_packs_epi32(staterr, zero);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
-		_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,
+		_mm_storeu_si128(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos + 1]),
 				 pkt_mb2);
-		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+		_mm_storeu_si128(rte_mbuf_rx_descriptor_fields1(rx_pkts[pos]),
 				 pkt_mb1);
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc available number of desc */