[RFC] net/memif: add a fast path for Rx
Checks
Commit Message
For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. Add
a fast memory copy path by removing this branch with mbuf
and memif buffer size defined at compile time. The removal
of the branch leads to performance uplift.
When mbuf >= memif buffer size, Rx chooses the fast memcpy
path. Test with 1p1q on Ampere Altra AArch64 server, there
is 2.6% perf gain with non-zero-copy mode, and 1.36% perf
gain with zero-copy mode. Test with 1p1q on Cascade Lake
Xeon X86 server, there is 3.04% perf gain with non-zero-copy
mode, and 0.27% perf gain with zero-copy mode.
Signed-off-by: Joyce Kong <joyce.kong@arm.com>
---
drivers/net/memif/rte_eth_memif.c | 124 ++++++++++++++++++++----------
1 file changed, 84 insertions(+), 40 deletions(-)
@@ -342,66 +342,111 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
goto refill;
n_slots = last_slot - cur_slot;
- while (n_slots && n_rx_pkts < nb_pkts) {
- mbuf_head = rte_pktmbuf_alloc(mq->mempool);
- if (unlikely(mbuf_head == NULL))
- goto no_free_bufs;
- mbuf = mbuf_head;
- mbuf->port = mq->in_port;
+ if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) {
+ while (n_slots && n_rx_pkts < nb_pkts) {
+ mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+ if (unlikely(mbuf_head == NULL))
+ goto no_free_bufs;
+ mbuf = mbuf_head;
+ mbuf->port = mq->in_port;
+
+next_slot1:
+ s0 = cur_slot & mask;
+ d0 = &ring->desc[s0];
-next_slot:
- s0 = cur_slot & mask;
- d0 = &ring->desc[s0];
+ cp_len = d0->length;
- src_len = d0->length;
- dst_off = 0;
- src_off = 0;
+ rte_pktmbuf_data_len(mbuf) = cp_len;
+ rte_pktmbuf_pkt_len(mbuf) = cp_len;
+ if (mbuf != mbuf_head)
+ rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
- do {
- dst_len = mbuf_size - dst_off;
- if (dst_len == 0) {
- dst_off = 0;
- dst_len = mbuf_size;
+ rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+ (uint8_t *)memif_get_buffer(proc_private, d0), cp_len);
+
+ cur_slot++;
+ n_slots--;
- /* store pointer to tail */
+ if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
mbuf_tail = mbuf;
mbuf = rte_pktmbuf_alloc(mq->mempool);
if (unlikely(mbuf == NULL))
goto no_free_bufs;
- mbuf->port = mq->in_port;
ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
if (unlikely(ret < 0)) {
MIF_LOG(ERR, "number-of-segments-overflow");
rte_pktmbuf_free(mbuf);
goto no_free_bufs;
}
+ goto next_slot1;
}
- cp_len = RTE_MIN(dst_len, src_len);
- rte_pktmbuf_data_len(mbuf) += cp_len;
- rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
- if (mbuf != mbuf_head)
- rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+ mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+ *bufs++ = mbuf_head;
+ n_rx_pkts++;
+ }
+ } else {
+ while (n_slots && n_rx_pkts < nb_pkts) {
+ mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+ if (unlikely(mbuf_head == NULL))
+ goto no_free_bufs;
+ mbuf = mbuf_head;
+ mbuf->port = mq->in_port;
+
+next_slot2:
+ s0 = cur_slot & mask;
+ d0 = &ring->desc[s0];
- rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
- dst_off),
- (uint8_t *)memif_get_buffer(proc_private, d0) +
- src_off, cp_len);
+ src_len = d0->length;
+ dst_off = 0;
+ src_off = 0;
- src_off += cp_len;
- dst_off += cp_len;
- src_len -= cp_len;
- } while (src_len);
+ do {
+ dst_len = mbuf_size - dst_off;
+ if (dst_len == 0) {
+ dst_off = 0;
+ dst_len = mbuf_size;
+
+ /* store pointer to tail */
+ mbuf_tail = mbuf;
+ mbuf = rte_pktmbuf_alloc(mq->mempool);
+ if (unlikely(mbuf == NULL))
+ goto no_free_bufs;
+ mbuf->port = mq->in_port;
+ ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
+ if (unlikely(ret < 0)) {
+ MIF_LOG(ERR, "number-of-segments-overflow");
+ rte_pktmbuf_free(mbuf);
+ goto no_free_bufs;
+ }
+ }
+ cp_len = RTE_MIN(dst_len, src_len);
- cur_slot++;
- n_slots--;
+ rte_pktmbuf_data_len(mbuf) += cp_len;
+ rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
+ if (mbuf != mbuf_head)
+ rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
- if (d0->flags & MEMIF_DESC_FLAG_NEXT)
- goto next_slot;
+ rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
+ dst_off),
+ (uint8_t *)memif_get_buffer(proc_private, d0) +
+ src_off, cp_len);
- mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
- *bufs++ = mbuf_head;
- n_rx_pkts++;
+ src_off += cp_len;
+ dst_off += cp_len;
+ src_len -= cp_len;
+ } while (src_len);
+
+ cur_slot++;
+ n_slots--;
+
+ if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+ goto next_slot2;
+
+ mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+ *bufs++ = mbuf_head;
+ n_rx_pkts++;
+ }
}
no_free_bufs:
@@ -694,7 +739,6 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
return n_tx_pkts;
}
-
static int
memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,