diff mbox series

[v2,2/2] net/memif: add a Tx fast path

Message ID 20220701102815.1444223-3-joyce.kong@arm.com (mailing list archive)
State New
Delegated to: Ferruh Yigit
Headers show
Series add a fast path for memif Rx/Tx | expand

Checks

Context Check Description
ci/iol-abi-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Joyce Kong July 1, 2022, 10:28 a.m. UTC
For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. If all
mbufs come from the same mempool, and memif buf size >= mbuf
size, add a fast Tx memory copy path without the comparing
branch and with mbuf bulk free, otherwise still run the
original Tx path.
The Tx fast path would not change memif's behavior of storing
mbuf.

The removal of the branch and bulk free lead to considerable
performance uplift.

Test with 1p1q on Ampere Altra AArch64 server,
----------------------------------------------
|  buf size   | memif >= mbuf | memif < mbuf |
----------------------------------------------
| non-zc gain |    13.35%     |    -0.77%    |
----------------------------------------------
|  zc gain    |    17.15%     |    -0.47%    |
----------------------------------------------

Test with 1p1q on Cascade Lake Xeon X86server,
----------------------------------------------
|  buf size   | memif >= mbuf | memif < mbuf |
----------------------------------------------
| non-zc gain |    10.10%     |    -0.29%    |
----------------------------------------------
|   zc gain   |     8.87%     |    -0.99%    |
----------------------------------------------

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/memif/rte_eth_memif.c | 134 ++++++++++++++++++++----------
 1 file changed, 92 insertions(+), 42 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
index 24fc8b13fa..bafcfd5a7c 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -659,62 +659,112 @@  eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
 	}
 
-	while (n_tx_pkts < nb_pkts && n_free) {
-		mbuf_head = *bufs++;
-		nb_segs = mbuf_head->nb_segs;
-		mbuf = mbuf_head;
+	uint8_t i;
+	struct rte_mbuf **buf_tmp = bufs;
+	mbuf_head = *buf_tmp++;
+	struct rte_mempool *mp = mbuf_head->pool;
+
+	for (i = 1; i < nb_pkts; i++) {
+		mbuf_head = *buf_tmp++;
+		if (mbuf_head->pool != mp)
+			break;
+	}
+
+	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
+	if (i == nb_pkts && pmd->cfg.pkt_buffer_size >= mbuf_size) {
+		buf_tmp = bufs;
+		while (n_tx_pkts < nb_pkts && n_free) {
+			mbuf_head = *bufs++;
+			nb_segs = mbuf_head->nb_segs;
+			mbuf = mbuf_head;
 
-		saved_slot = slot;
-		d0 = &ring->desc[slot & mask];
-		dst_off = 0;
-		dst_len = (type == MEMIF_RING_C2S) ?
-			pmd->run.pkt_buffer_size : d0->length;
+			saved_slot = slot;
 
-next_in_chain:
-		src_off = 0;
-		src_len = rte_pktmbuf_data_len(mbuf);
+next_in_chain1:
+			d0 = &ring->desc[slot & mask];
+			cp_len = rte_pktmbuf_data_len(mbuf);
 
-		while (src_len) {
-			if (dst_len == 0) {
+			rte_memcpy((uint8_t *)memif_get_buffer(proc_private, d0),
+				rte_pktmbuf_mtod(mbuf, void *), cp_len);
+
+			d0->length = cp_len;
+			mq->n_bytes += cp_len;
+			slot++;
+			n_free--;
+
+			if (--nb_segs > 0) {
 				if (n_free) {
-					slot++;
-					n_free--;
 					d0->flags |= MEMIF_DESC_FLAG_NEXT;
-					d0 = &ring->desc[slot & mask];
-					dst_off = 0;
-					dst_len = (type == MEMIF_RING_C2S) ?
-					    pmd->run.pkt_buffer_size : d0->length;
-					d0->flags = 0;
+					mbuf = mbuf->next;
+					goto next_in_chain1;
 				} else {
 					slot = saved_slot;
-					goto no_free_slots;
+					goto free_mbufs;
 				}
 			}
-			cp_len = RTE_MIN(dst_len, src_len);
 
-			rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
-							       d0) + dst_off,
-				rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
-				cp_len);
+			n_tx_pkts++;
+		}
+free_mbufs:
+		rte_pktmbuf_free_bulk(buf_tmp, n_tx_pkts);
+	} else {
+		while (n_tx_pkts < nb_pkts && n_free) {
+			mbuf_head = *bufs++;
+			nb_segs = mbuf_head->nb_segs;
+			mbuf = mbuf_head;
 
-			mq->n_bytes += cp_len;
-			src_off += cp_len;
-			dst_off += cp_len;
-			src_len -= cp_len;
-			dst_len -= cp_len;
+			saved_slot = slot;
+			d0 = &ring->desc[slot & mask];
+			dst_off = 0;
+			dst_len = (type == MEMIF_RING_C2S) ?
+				pmd->run.pkt_buffer_size : d0->length;
 
-			d0->length = dst_off;
-		}
+next_in_chain2:
+			src_off = 0;
+			src_len = rte_pktmbuf_data_len(mbuf);
 
-		if (--nb_segs > 0) {
-			mbuf = mbuf->next;
-			goto next_in_chain;
-		}
+			while (src_len) {
+				if (dst_len == 0) {
+					if (n_free) {
+						slot++;
+						n_free--;
+						d0->flags |= MEMIF_DESC_FLAG_NEXT;
+						d0 = &ring->desc[slot & mask];
+						dst_off = 0;
+						dst_len = (type == MEMIF_RING_C2S) ?
+						    pmd->run.pkt_buffer_size : d0->length;
+						d0->flags = 0;
+					} else {
+						slot = saved_slot;
+						goto no_free_slots;
+					}
+				}
+				cp_len = RTE_MIN(dst_len, src_len);
 
-		n_tx_pkts++;
-		slot++;
-		n_free--;
-		rte_pktmbuf_free(mbuf_head);
+				rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
+								       d0) + dst_off,
+					rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
+					cp_len);
+
+				mq->n_bytes += cp_len;
+				src_off += cp_len;
+				dst_off += cp_len;
+				src_len -= cp_len;
+				dst_len -= cp_len;
+
+				d0->length = dst_off;
+			}
+
+			if (--nb_segs > 0) {
+				mbuf = mbuf->next;
+				goto next_in_chain2;
+			}
+
+			n_tx_pkts++;
+			slot++;
+			n_free--;
+			rte_pktmbuf_free(mbuf_head);
+		}
 	}
 
 no_free_slots: