[dpdk-dev,RFC,07/14] ixgbe: rework vector pmd following mbuf changes

Message ID 1407789890-17355-8-git-send-email-bruce.richardson@intel.com (mailing list archive)
State RFC, archived
Headers

Commit Message

Bruce Richardson Aug. 11, 2014, 8:44 p.m. UTC
  The vector PMD expects fields to be in a specific order so that it can
do vector operations on multiple fields at a time. Following mbuf
rework, adjust driver to take account of the new layout and re-enable it
in the config.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 config/common_linuxapp                |   2 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c     |   2 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.h     |   2 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 150 ++++++++++++----------------------
 4 files changed, 55 insertions(+), 101 deletions(-)
  

Patch

diff --git a/config/common_linuxapp b/config/common_linuxapp
index b140af7..5bee910 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -191,7 +191,7 @@  CONFIG_RTE_LIBRTE_IXGBE_DEBUG_DRIVER=n
 CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n
 CONFIG_RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC=y
 CONFIG_RTE_LIBRTE_IXGBE_ALLOW_UNSUPPORTED_SFP=n
-CONFIG_RTE_IXGBE_INC_VECTOR=n
+CONFIG_RTE_IXGBE_INC_VECTOR=y
 CONFIG_RTE_IXGBE_RX_OLFLAGS_ENABLE=y
 
 #
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 036b9bf..1b0e272 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -1523,7 +1523,7 @@  ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 
 		/* Prefetch data of first segment, if configured to do so. */
-		rte_packet_prefetch((char *)first_seg->buf_addr + 
+		rte_packet_prefetch((char *)first_seg->buf_addr +
 			first_seg->data_off);
 
 		/*
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
index 8d9476d..1861f18 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h
@@ -126,7 +126,7 @@  struct igb_rx_queue {
 #ifdef RTE_IXGBE_INC_VECTOR
 	uint16_t            rxrearm_nb; /**< the idx we start the re-arming from */
 	uint16_t            rxrearm_start;  /**< number of remaining to be re-armed */
-	__m128i             misc_info;  /**< cache XMM combine port_id/crc/nb_segs */
+	uint64_t            mbuf_initializer; /**< value to init mbufs */
 #endif
 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
 	uint16_t            queue_id; /**< RX queue index. */
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
index dd02cf5..780bf1e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c
@@ -44,24 +44,28 @@ 
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-static struct rte_mbuf mb_def = {
 
-	.ol_flags = 0,
-	.data_len = 0,
-	.pkt_len = 0,
-
-	.vlan_macip = {
-		.data = 0,
-	},
-	.hash = {
-		.rss = 0,
-	},
+int
+ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq,
+			__rte_unused unsigned int socket_id)
+{
+	static struct rte_mbuf mb_def = {
+		.buf_len = 0,
+		.data_len = 0,
 
-	.nb_segs = 1,
-	.in_port = 0,
+		.data_off = RTE_PKTMBUF_HEADROOM,
+#ifdef RTE_MBUF_REFCNT
+		.refcnt = 1,
+#endif
+		.nb_segs = 1,
+		.port = 0
+	};
 
-	.next = NULL,
-};
+	mb_def.buf_len = rxq->mb_pool->elt_size - sizeof(struct rte_mbuf);
+	mb_def.port = rxq->port_id;
+	rxq->mbuf_initializer = *((uint64_t *)&mb_def.rearm_data);
+	return 0;
+}
 
 static inline void
 ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
@@ -71,7 +75,6 @@  ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	struct igb_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
 	struct rte_mbuf *mb0, *mb1;
-	__m128i def_low;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 			RTE_PKTMBUF_HEADROOM);
 
@@ -82,8 +85,6 @@  ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
-	def_low = _mm_load_si128((__m128i *)&(mb_def.next));
-
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
 	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		__m128i dma_addr0, dma_addr1;
@@ -92,33 +93,25 @@  ixgbe_rxq_rearm(struct igb_rx_queue *rxq)
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
+		/* flush mbuf with pkt template */
+		mb0->rearm_data[0] = rxq->mbuf_initializer;
+		mb1->rearm_data[0] = rxq->mbuf_initializer;
+
 		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
 		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
 		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
 
-		/* calc va/pa of pkt data point */
-		vaddr0 = _mm_add_epi64(vaddr0, hdr_room);
-		vaddr1 = _mm_add_epi64(vaddr1, hdr_room);
-
 		/* convert pa to dma_addr hdr/data */
 		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
 		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
 
-		/* fill va into t0 def pkt template */
-		vaddr0 = _mm_unpacklo_epi64(def_low, vaddr0);
-		vaddr1 = _mm_unpacklo_epi64(def_low, vaddr1);
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
 
 		/* flush desc with pa dma_addr */
 		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
 		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
-
-		/* flush mbuf with pkt template */
-		_mm_store_si128((__m128i *)&mb0->next, vaddr0);
-		_mm_store_si128((__m128i *)&mb1->next, vaddr1);
-
-		/* update refcnt per pkt */
-		rte_mbuf_refcnt_set(mb0, 1);
-		rte_mbuf_refcnt_set(mb1, 1);
 	}
 
 	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
@@ -205,7 +198,13 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
-	__m128i in_port;
+	__m128i crc_adjust = _mm_set_epi16(
+				0, 0, 0, 0, /* ignore non-length fields */
+				0,          /* ignore high-16bits of pkt_len */
+				-rxq->crc_len, /* sub crc on pkt_len */
+				-rxq->crc_len, /* sub crc on data_len */
+				0            /* ignore pkt_type field */
+			);
 	__m128i dd_check;
 
 	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
@@ -238,8 +237,8 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		15, 14,      /* octet 14~15, low 16 bits vlan_macip */
 		0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
 		13, 12,      /* octet 12~13, low 16 bits pkt_len */
-		0xFF, 0xFF,  /* skip nb_segs and in_port, zero out */
-		13, 12       /* octet 12~13, 16 bits data_len */
+		13, 12,      /* octet 12~13, 16 bits data_len */
+		0xFF, 0xFF   /* skip pkt_type field */
 		);
 
 
@@ -247,9 +246,6 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	 * the next 'n' mbufs into the cache */
 	sw_ring = &rxq->sw_ring[rxq->rx_tail];
 
-	/* in_port, nb_seg = 1, crc_len */
-	in_port = rxq->misc_info;
-
 	/*
 	 * A. load 4 packet in one loop
 	 * B. copy 4 mbuf point from swring to rx_pkts
@@ -301,8 +297,8 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		desc_to_olflags_v(descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, in_port);
-		pkt_mb3 = _mm_add_epi16(pkt_mb3, in_port);
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
 
 		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
 		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
@@ -313,23 +309,23 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
 
 		/* D.3 copy final 3,4 data to rx_pkts */
-		_mm_storeu_si128((__m128i *)&(rx_pkts[pos+3]->data_len),
+		_mm_storeu_si128(rx_pkts[pos+3]->rx_descriptor_fields1,
 				pkt_mb4);
-		_mm_storeu_si128((__m128i *)&(rx_pkts[pos+2]->data_len),
+		_mm_storeu_si128(rx_pkts[pos+2]->rx_descriptor_fields1,
 				pkt_mb3);
 
 		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, in_port);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, in_port);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 
 		/* C.3 calc avaialbe number of desc */
 		staterr = _mm_and_si128(staterr, dd_check);
 		staterr = _mm_packs_epi32(staterr, zero);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
-		_mm_storeu_si128((__m128i *)&(rx_pkts[pos+1]->data_len),
+		_mm_storeu_si128(rx_pkts[pos+1]->rx_descriptor_fields1,
 				pkt_mb2);
-		_mm_storeu_si128((__m128i *)&(rx_pkts[pos]->data_len),
+		_mm_storeu_si128(rx_pkts[pos]->rx_descriptor_fields1,
 				pkt_mb1);
 
 		/* C.4 calc avaialbe number of desc */
@@ -346,46 +342,19 @@  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 	return nb_pkts_recd;
 }
-
 static inline void
 vtx1(volatile union ixgbe_adv_tx_desc *txdp,
-		struct rte_mbuf *pkt, __m128i flags)
+		struct rte_mbuf *pkt, uint64_t flags)
 {
-	__m128i t0, t1, offset, ols, ba, ctl;
-
-	/* load buf_addr/buf_physaddr in t0 */
-	t0 = _mm_loadu_si128((__m128i *)&(pkt->buf_addr));
-	/* load data, ... pkt_len in t1 */
-	t1 = _mm_loadu_si128((__m128i *)&(pkt->data));
-
-	/* calc offset = (data - buf_adr) */
-	offset = _mm_sub_epi64(t1, t0);
-
-	/* cmd_type_len: pkt_len |= DCMD_DTYP_FLAGS */
-	ctl = _mm_or_si128(t1, flags);
-
-	/* reorder as buf_physaddr/buf_addr */
-	offset = _mm_shuffle_epi32(offset, 0x4E);
-
-	/* olinfo_stats: pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT */
-	ols = _mm_slli_epi32(t1, IXGBE_ADVTXD_PAYLEN_SHIFT);
-
-	/* buffer_addr = buf_physaddr + offset */
-	ba = _mm_add_epi64(t0, offset);
-
-	/* format cmd_type_len/olinfo_status */
-	ctl = _mm_unpackhi_epi32(ctl, ols);
-
-	/* format buf_physaddr/cmd_type_len */
-	ba = _mm_unpackhi_epi64(ba, ctl);
-
-	/* write desc */
-	_mm_store_si128((__m128i *)&txdp->read, ba);
+	__m128i descriptor = _mm_set_epi64x((uint64_t)pkt->pkt_len << 46 |
+			flags | pkt->data_len,
+			pkt->buf_physaddr + pkt->data_off);
+	_mm_store_si128((__m128i *)&txdp->read, descriptor);
 }
 
 static inline void
 vtx(volatile union ixgbe_adv_tx_desc *txdp,
-		struct rte_mbuf **pkt, uint16_t nb_pkts,  __m128i flags)
+		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 {
 	int i;
 	for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
@@ -472,9 +441,8 @@  ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct igb_tx_entry_v *txep;
 	struct igb_tx_entry_seq *txsp;
 	uint16_t n, nb_commit, tx_id;
-	__m128i flags = _mm_set_epi32(DCMD_DTYP_FLAGS, 0, 0, 0);
-	__m128i rs = _mm_set_epi32(IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS,
-			0, 0, 0);
+	uint64_t flags = DCMD_DTYP_FLAGS;
+	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
 	int i;
 
 	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
@@ -653,21 +621,6 @@  int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,
 	return 0;
 }
 
-int ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq,
-			__rte_unused unsigned int socket_id)
-{
-	rxq->misc_info =
-		_mm_set_epi16(
-			0, 0, 0, 0, 0,
-			(uint16_t)-rxq->crc_len, /* sub crc on pkt_len */
-			(uint16_t)(rxq->port_id << 8 | 1),
-			/* 8b port_id and 8b nb_seg*/
-			(uint16_t)-rxq->crc_len  /* sub crc on data_len */
-			);
-
-	return 0;
-}
-
 int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev)
 {
 #ifndef RTE_LIBRTE_IEEE1588
@@ -699,3 +652,4 @@  int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev)
 	return -1;
 #endif
 }
+