From patchwork Tue May 17 10:51:08 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joyce Kong X-Patchwork-Id: 111239 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 19B47A0505; Tue, 17 May 2022 12:52:08 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 91D3542686; Tue, 17 May 2022 12:52:05 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id C636542684 for ; Tue, 17 May 2022 12:52:03 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4D8CC1042; Tue, 17 May 2022 03:52:03 -0700 (PDT) Received: from net-arm-n1amp-02.shanghai.arm.com (net-arm-n1amp-02.shanghai.arm.com [10.169.210.142]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 790B23F66F; Tue, 17 May 2022 03:52:01 -0700 (PDT) From: Joyce Kong To: Jakub Grajciar Cc: ruifeng.wang@arm.com, dev@dpdk.org, nd@arm.com, Joyce Kong Subject: [PATCH v1 1/2] net/memif: add a Rx fast path Date: Tue, 17 May 2022 10:51:08 +0000 Message-Id: <20220517105109.1086090-2-joyce.kong@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220517105109.1086090-1-joyce.kong@arm.com> References: <20220412093243.3670187-1-joyce.kong@arm.com> <20220517105109.1086090-1-joyce.kong@arm.com> MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org For memif non-zero-copy mode, there is a branch to compare the mbuf and memif buffer size during memory copying. Add a fast memory copy path by removing this branch with mbuf and memif buffer size defined at compile time. The removal of the branch leads to considerable performance uplift. When memif <= buffer size, Rx chooses the fast memcpy path, otherwise it would choose the original path. Test with 1p1q on Ampere Altra AArch64 server, -------------------------------------------- buf size | memif <= mbuf | memif > mbuf | -------------------------------------------- non-zc gain | 4.30% | -0.52% | -------------------------------------------- zc gain | 2.46% | 0.70% | -------------------------------------------- Test with 1p1q on Cascade Lake Xeon X86server, ------------------------------------------- buf size | memif <= mbuf | memif > mbuf | ------------------------------------------- non-zc gain | 2.13% | -1.40% | ------------------------------------------- zc gain | 0.18% | 0.48% | ------------------------------------------- Signed-off-by: Joyce Kong --- drivers/net/memif/rte_eth_memif.c | 124 ++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c index 587ad45576..f55776ca46 100644 --- a/drivers/net/memif/rte_eth_memif.c +++ b/drivers/net/memif/rte_eth_memif.c @@ -342,66 +342,111 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) goto refill; n_slots = last_slot - cur_slot; - while (n_slots && n_rx_pkts < nb_pkts) { - mbuf_head = rte_pktmbuf_alloc(mq->mempool); - if (unlikely(mbuf_head == NULL)) - goto no_free_bufs; - mbuf = mbuf_head; - mbuf->port = mq->in_port; + if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) { + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf_head == NULL)) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; + +next_slot1: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; -next_slot: - s0 = cur_slot & mask; - d0 = &ring->desc[s0]; + cp_len = d0->length; - src_len = d0->length; - dst_off = 0; - src_off = 0; + rte_pktmbuf_data_len(mbuf) = cp_len; + rte_pktmbuf_pkt_len(mbuf) = cp_len; + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; - do { - dst_len = mbuf_size - dst_off; - if (dst_len == 0) { - dst_off = 0; - dst_len = mbuf_size; + rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), + (uint8_t *)memif_get_buffer(proc_private, d0), cp_len); + + cur_slot++; + n_slots--; - /* store pointer to tail */ + if (d0->flags & MEMIF_DESC_FLAG_NEXT) { mbuf_tail = mbuf; mbuf = rte_pktmbuf_alloc(mq->mempool); if (unlikely(mbuf == NULL)) goto no_free_bufs; - mbuf->port = mq->in_port; ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); if (unlikely(ret < 0)) { MIF_LOG(ERR, "number-of-segments-overflow"); rte_pktmbuf_free(mbuf); goto no_free_bufs; } + goto next_slot1; } - cp_len = RTE_MIN(dst_len, src_len); - rte_pktmbuf_data_len(mbuf) += cp_len; - rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); - if (mbuf != mbuf_head) - rte_pktmbuf_pkt_len(mbuf_head) += cp_len; + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } + } else { + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf_head == NULL)) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; + +next_slot2: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; - rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, - dst_off), - (uint8_t *)memif_get_buffer(proc_private, d0) + - src_off, cp_len); + src_len = d0->length; + dst_off = 0; + src_off = 0; - src_off += cp_len; - dst_off += cp_len; - src_len -= cp_len; - } while (src_len); + do { + dst_len = mbuf_size - dst_off; + if (dst_len == 0) { + dst_off = 0; + dst_len = mbuf_size; + + /* store pointer to tail */ + mbuf_tail = mbuf; + mbuf = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf == NULL)) + goto no_free_bufs; + mbuf->port = mq->in_port; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "number-of-segments-overflow"); + rte_pktmbuf_free(mbuf); + goto no_free_bufs; + } + } + cp_len = RTE_MIN(dst_len, src_len); - cur_slot++; - n_slots--; + rte_pktmbuf_data_len(mbuf) += cp_len; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; - if (d0->flags & MEMIF_DESC_FLAG_NEXT) - goto next_slot; + rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, + dst_off), + (uint8_t *)memif_get_buffer(proc_private, d0) + + src_off, cp_len); - mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); - *bufs++ = mbuf_head; - n_rx_pkts++; + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + } while (src_len); + + cur_slot++; + n_slots--; + + if (d0->flags & MEMIF_DESC_FLAG_NEXT) + goto next_slot2; + + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } } no_free_bufs: @@ -694,7 +739,6 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return n_tx_pkts; } - static int memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq, memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,