[RFC] net/memif: add a fast path for Rx

Message ID 20220412093243.3670187-1-joyce.kong@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [RFC] net/memif: add a fast path for Rx |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Joyce Kong April 12, 2022, 9:32 a.m. UTC
  For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. Add
a fast memory copy path by removing this branch with mbuf
and memif buffer size defined at compile time. The removal
of the branch leads to performance uplift.

When mbuf >= memif buffer size, Rx chooses the fast memcpy
path. Test with 1p1q on Ampere Altra AArch64 server, there
is 2.6% perf gain with non-zero-copy mode, and 1.36% perf
gain with zero-copy mode. Test with 1p1q on Cascade Lake
Xeon X86 server, there is 3.04% perf gain with non-zero-copy
mode, and 0.27% perf gain with zero-copy mode.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
 drivers/net/memif/rte_eth_memif.c | 124 ++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 40 deletions(-)
  

Patch

diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
index 587ad45576..f55776ca46 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -342,66 +342,111 @@  eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		goto refill;
 	n_slots = last_slot - cur_slot;
 
-	while (n_slots && n_rx_pkts < nb_pkts) {
-		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
-		if (unlikely(mbuf_head == NULL))
-			goto no_free_bufs;
-		mbuf = mbuf_head;
-		mbuf->port = mq->in_port;
+	if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) {
+		while (n_slots && n_rx_pkts < nb_pkts) {
+			mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+			if (unlikely(mbuf_head == NULL))
+				goto no_free_bufs;
+			mbuf = mbuf_head;
+			mbuf->port = mq->in_port;
+
+next_slot1:
+			s0 = cur_slot & mask;
+			d0 = &ring->desc[s0];
 
-next_slot:
-		s0 = cur_slot & mask;
-		d0 = &ring->desc[s0];
+			cp_len = d0->length;
 
-		src_len = d0->length;
-		dst_off = 0;
-		src_off = 0;
+			rte_pktmbuf_data_len(mbuf) = cp_len;
+			rte_pktmbuf_pkt_len(mbuf) = cp_len;
+			if (mbuf != mbuf_head)
+				rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 
-		do {
-			dst_len = mbuf_size - dst_off;
-			if (dst_len == 0) {
-				dst_off = 0;
-				dst_len = mbuf_size;
+			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+				(uint8_t *)memif_get_buffer(proc_private, d0), cp_len);
+
+			cur_slot++;
+			n_slots--;
 
-				/* store pointer to tail */
+			if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
 				mbuf_tail = mbuf;
 				mbuf = rte_pktmbuf_alloc(mq->mempool);
 				if (unlikely(mbuf == NULL))
 					goto no_free_bufs;
-				mbuf->port = mq->in_port;
 				ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
 				if (unlikely(ret < 0)) {
 					MIF_LOG(ERR, "number-of-segments-overflow");
 					rte_pktmbuf_free(mbuf);
 					goto no_free_bufs;
 				}
+				goto next_slot1;
 			}
-			cp_len = RTE_MIN(dst_len, src_len);
 
-			rte_pktmbuf_data_len(mbuf) += cp_len;
-			rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
-			if (mbuf != mbuf_head)
-				rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+			mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+			*bufs++ = mbuf_head;
+			n_rx_pkts++;
+		}
+	} else {
+		while (n_slots && n_rx_pkts < nb_pkts) {
+			mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+			if (unlikely(mbuf_head == NULL))
+				goto no_free_bufs;
+			mbuf = mbuf_head;
+			mbuf->port = mq->in_port;
+
+next_slot2:
+			s0 = cur_slot & mask;
+			d0 = &ring->desc[s0];
 
-			rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
-							   dst_off),
-				(uint8_t *)memif_get_buffer(proc_private, d0) +
-				src_off, cp_len);
+			src_len = d0->length;
+			dst_off = 0;
+			src_off = 0;
 
-			src_off += cp_len;
-			dst_off += cp_len;
-			src_len -= cp_len;
-		} while (src_len);
+			do {
+				dst_len = mbuf_size - dst_off;
+				if (dst_len == 0) {
+					dst_off = 0;
+					dst_len = mbuf_size;
+
+					/* store pointer to tail */
+					mbuf_tail = mbuf;
+					mbuf = rte_pktmbuf_alloc(mq->mempool);
+					if (unlikely(mbuf == NULL))
+						goto no_free_bufs;
+					mbuf->port = mq->in_port;
+					ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
+					if (unlikely(ret < 0)) {
+						MIF_LOG(ERR, "number-of-segments-overflow");
+						rte_pktmbuf_free(mbuf);
+						goto no_free_bufs;
+					}
+				}
+				cp_len = RTE_MIN(dst_len, src_len);
 
-		cur_slot++;
-		n_slots--;
+				rte_pktmbuf_data_len(mbuf) += cp_len;
+				rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
+				if (mbuf != mbuf_head)
+					rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 
-		if (d0->flags & MEMIF_DESC_FLAG_NEXT)
-			goto next_slot;
+				rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
+								   dst_off),
+					(uint8_t *)memif_get_buffer(proc_private, d0) +
+					src_off, cp_len);
 
-		mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
-		*bufs++ = mbuf_head;
-		n_rx_pkts++;
+				src_off += cp_len;
+				dst_off += cp_len;
+				src_len -= cp_len;
+			} while (src_len);
+
+			cur_slot++;
+			n_slots--;
+
+			if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+				goto next_slot2;
+
+			mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+			*bufs++ = mbuf_head;
+			n_rx_pkts++;
+		}
 	}
 
 no_free_bufs:
@@ -694,7 +739,6 @@  eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	return n_tx_pkts;
 }
 
-
 static int
 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
 		memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,