From patchwork Fri Sep 11 12:09:04 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joyce Kong X-Patchwork-Id: 77384 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1027BA04B5; Fri, 11 Sep 2020 14:09:24 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id A41FA1C0D7; Fri, 11 Sep 2020 14:09:21 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 807111C0D0 for ; Fri, 11 Sep 2020 14:09:20 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id E8E4911B3; Fri, 11 Sep 2020 05:09:19 -0700 (PDT) Received: from net-arm-thunderx2-03.shanghai.arm.com (net-arm-thunderx2-03.shanghai.arm.com [10.169.210.123]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 2E1133F68F; Fri, 11 Sep 2020 05:09:16 -0700 (PDT) From: Joyce Kong To: maxime.coquelin@redhat.com Cc: jerinj@marvell.com, dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, phil.yang@arm.com Date: Fri, 11 Sep 2020 20:09:04 +0800 Message-Id: <20200911120906.45995-2-joyce.kong@arm.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20200911120906.45995-1-joyce.kong@arm.com> References: <20200911120906.45995-1-joyce.kong@arm.com> MIME-Version: 1.0 Subject: [dpdk-dev] [RFC 1/3] net/virtio: move AVX based Rx and Tx code to separate file X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Split out AVX instruction based virtio packed ring Rx and Tx implementation to a separate file. Signed-off-by: Phil Yang --- drivers/net/virtio/meson.build | 1 + drivers/net/virtio/virtio_rxtx_packed.c | 37 +++ drivers/net/virtio/virtio_rxtx_packed.h | 284 ++++++++++++++++++++ drivers/net/virtio/virtio_rxtx_packed_avx.c | 264 +----------------- 4 files changed, 323 insertions(+), 263 deletions(-) create mode 100644 drivers/net/virtio/virtio_rxtx_packed.c create mode 100644 drivers/net/virtio/virtio_rxtx_packed.h diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build index 3fd6051f4..e1851b0a6 100644 --- a/drivers/net/virtio/meson.build +++ b/drivers/net/virtio/meson.build @@ -5,6 +5,7 @@ sources += files('virtio_ethdev.c', 'virtio_pci.c', 'virtio_rxtx.c', 'virtio_rxtx_simple.c', + 'virtio_rxtx_packed.c', 'virtqueue.c') deps += ['kvargs', 'bus_pci'] diff --git a/drivers/net/virtio/virtio_rxtx_packed.c b/drivers/net/virtio/virtio_rxtx_packed.c new file mode 100644 index 000000000..e614e19fc --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx_packed.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtio_rxtx_packed.h" +#include "virtqueue.h" + +/* Stub for linkage when arch specific implementation is not available */ +__rte_weak uint16_t +virtio_xmit_pkts_packed_vec(void *tx_queue __rte_unused, + struct rte_mbuf **tx_pkts __rte_unused, + uint16_t nb_pkts __rte_unused) +{ + rte_panic("Wrong weak function linked by linker\n"); + return 0; +} + +/* Stub for linkage when arch specific implementation is not available */ +__rte_weak uint16_t +virtio_recv_pkts_packed_vec(void *rx_queue __rte_unused, + struct rte_mbuf **rx_pkts __rte_unused, + uint16_t nb_pkts __rte_unused) +{ + rte_panic("Wrong weak function linked by linker\n"); + return 0; +} diff --git a/drivers/net/virtio/virtio_rxtx_packed.h b/drivers/net/virtio/virtio_rxtx_packed.h new file mode 100644 index 000000000..b2447843b --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx_packed.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#ifndef _VIRTIO_RXTX_PACKED_H_ +#define _VIRTIO_RXTX_PACKED_H_ + +#include +#include +#include +#include +#include + +#include + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtqueue.h" + +#define BYTE_SIZE 8 +/* flag bits offset in packed ring desc higher 64bits */ +#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ + offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) + +#define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \ + FLAGS_BITS_OFFSET) + +/* reference count offset in mbuf rearm data */ +#define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \ + offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) +/* segment number offset in mbuf rearm data */ +#define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \ + offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) + +/* default rearm data */ +#define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \ + 1ULL << REFCNT_BITS_OFFSET) + +/* id bits offset in packed ring desc higher 64bits */ +#define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \ + offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) + +/* net hdr short size mask */ +#define NET_HDR_MASK 0x3F + +#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ + sizeof(struct vring_packed_desc)) +#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) + +#ifdef VIRTIO_GCC_UNROLL_PRAGMA +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \ + for (iter = val; iter < size; iter++) +#endif + +#ifdef VIRTIO_CLANG_UNROLL_PRAGMA +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ + for (iter = val; iter < size; iter++) +#endif + +#ifdef VIRTIO_ICC_UNROLL_PRAGMA +#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \ + for (iter = val; iter < size; iter++) +#endif + +#ifndef virtio_for_each_try_unroll +#define virtio_for_each_try_unroll(iter, val, num) \ + for (iter = val; iter < num; iter++) +#endif + +static inline void +virtio_update_batch_stats(struct virtnet_stats *stats, + uint16_t pkt_len1, + uint16_t pkt_len2, + uint16_t pkt_len3, + uint16_t pkt_len4) +{ + stats->bytes += pkt_len1; + stats->bytes += pkt_len2; + stats->bytes += pkt_len3; + stats->bytes += pkt_len4; +} + +static inline int +virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq, + struct rte_mbuf *txm) +{ + struct virtqueue *vq = txvq->vq; + struct virtio_hw *hw = vq->hw; + uint16_t hdr_size = hw->vtnet_hdr_size; + uint16_t slots, can_push; + int16_t need; + + /* How many main ring entries are needed to this Tx? + * any_layout => number of segments + * default => number of segments + 1 + */ + can_push = rte_mbuf_refcnt_read(txm) == 1 && + RTE_MBUF_DIRECT(txm) && + txm->nb_segs == 1 && + rte_pktmbuf_headroom(txm) >= hdr_size; + + slots = txm->nb_segs + !can_push; + need = slots - vq->vq_free_cnt; + + /* Positive value indicates it need free vring descriptors */ + if (unlikely(need > 0)) { + virtio_xmit_cleanup_inorder_packed(vq, need); + need = slots - vq->vq_free_cnt; + if (unlikely(need > 0)) { + PMD_TX_LOG(ERR, + "No free tx descriptors to transmit"); + return -1; + } + } + + /* Enqueue Packet buffers */ + virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push, 1); + + txvq->stats.bytes += txm->pkt_len; + return 0; +} + +/* Optionally fill offload information in structure */ +static inline int +virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) +{ + struct rte_net_hdr_lens hdr_lens; + uint32_t hdrlen, ptype; + int l4_supported = 0; + + /* nothing to do */ + if (hdr->flags == 0) + return 0; + + /* GSO not support in vec path, skip check */ + m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; + + ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); + m->packet_type = ptype; + if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) + l4_supported = 1; + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; + if (hdr->csum_start <= hdrlen && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_NONE; + } else { + /* Unknown proto or tunnel, do sw cksum. We can assume + * the cksum field is in the first segment since the + * buffers we provided to the host are large enough. + * In case of SCTP, this will be wrong since it's a CRC + * but there's nothing we can do. + */ + uint16_t csum = 0, off; + + rte_raw_cksum_mbuf(m, hdr->csum_start, + rte_pktmbuf_pkt_len(m) - hdr->csum_start, + &csum); + if (likely(csum != 0xffff)) + csum = ~csum; + off = hdr->csum_offset + hdr->csum_start; + if (rte_pktmbuf_data_len(m) >= off + 1) + *rte_pktmbuf_mtod_offset(m, uint16_t *, + off) = csum; + } + } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + return 0; +} + +static inline uint16_t +virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq, + struct rte_mbuf **rx_pkts) +{ + uint16_t used_idx, id; + uint32_t len; + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; + uint32_t hdr_size = hw->vtnet_hdr_size; + struct virtio_net_hdr *hdr; + struct vring_packed_desc *desc; + struct rte_mbuf *cookie; + + desc = vq->vq_packed.ring.desc; + used_idx = vq->vq_used_cons_idx; + if (!desc_is_used(&desc[used_idx], vq)) + return -1; + + len = desc[used_idx].len; + id = desc[used_idx].id; + cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie; + if (unlikely(cookie == NULL)) { + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", + vq->vq_used_cons_idx); + return -1; + } + rte_prefetch0(cookie); + rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); + + cookie->data_off = RTE_PKTMBUF_HEADROOM; + cookie->ol_flags = 0; + cookie->pkt_len = (uint32_t)(len - hdr_size); + cookie->data_len = (uint32_t)(len - hdr_size); + + hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + if (hw->has_rx_offload) + virtio_vec_rx_offload(cookie, hdr); + + *rx_pkts = cookie; + + rxvq->stats.bytes += cookie->pkt_len; + + vq->vq_free_cnt++; + vq->vq_used_cons_idx++; + if (vq->vq_used_cons_idx >= vq->vq_nentries) { + vq->vq_used_cons_idx -= vq->vq_nentries; + vq->vq_packed.used_wrap_counter ^= 1; + } + + return 0; +} + +static inline void +virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq, + struct rte_mbuf **cookie, + uint16_t num) +{ + struct virtqueue *vq = rxvq->vq; + struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; + uint16_t flags = vq->vq_packed.cached_flags; + struct virtio_hw *hw = vq->hw; + struct vq_desc_extra *dxp; + uint16_t idx, i; + uint16_t batch_num, total_num = 0; + uint16_t head_idx = vq->vq_avail_idx; + uint16_t head_flag = vq->vq_packed.cached_flags; + uint64_t addr; + + do { + idx = vq->vq_avail_idx; + + batch_num = PACKED_BATCH_SIZE; + if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries)) + batch_num = vq->vq_nentries - idx; + if (unlikely((total_num + batch_num) > num)) + batch_num = num - total_num; + + virtio_for_each_try_unroll(i, 0, batch_num) { + dxp = &vq->vq_descx[idx + i]; + dxp->cookie = (void *)cookie[total_num + i]; + + addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) + + RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; + start_dp[idx + i].addr = addr; + start_dp[idx + i].len = cookie[total_num + i]->buf_len + - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; + if (total_num || i) { + virtqueue_store_flags_packed(&start_dp[idx + i], + flags, hw->weak_barriers); + } + } + + vq->vq_avail_idx += batch_num; + if (vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->vq_packed.cached_flags ^= + VRING_PACKED_DESC_F_AVAIL_USED; + flags = vq->vq_packed.cached_flags; + } + total_num += batch_num; + } while (total_num < num); + + virtqueue_store_flags_packed(&start_dp[head_idx], head_flag, + hw->weak_barriers); + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); +} + +#endif /* _VIRTIO_RXTX_PACKED_H_ */ diff --git a/drivers/net/virtio/virtio_rxtx_packed_avx.c b/drivers/net/virtio/virtio_rxtx_packed_avx.c index 6a8214725..c8fbb8f2c 100644 --- a/drivers/net/virtio/virtio_rxtx_packed_avx.c +++ b/drivers/net/virtio/virtio_rxtx_packed_avx.c @@ -13,71 +13,9 @@ #include "virtio_logs.h" #include "virtio_ethdev.h" #include "virtio_pci.h" +#include "virtio_rxtx_packed.h" #include "virtqueue.h" -#define BYTE_SIZE 8 -/* flag bits offset in packed ring desc higher 64bits */ -#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ - offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) - -#define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \ - FLAGS_BITS_OFFSET) - -/* reference count offset in mbuf rearm data */ -#define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \ - offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) -/* segment number offset in mbuf rearm data */ -#define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \ - offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) - -/* default rearm data */ -#define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \ - 1ULL << REFCNT_BITS_OFFSET) - -/* id bits offset in packed ring desc higher 64bits */ -#define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \ - offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) - -/* net hdr short size mask */ -#define NET_HDR_MASK 0x3F - -#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ - sizeof(struct vring_packed_desc)) -#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) - -#ifdef VIRTIO_GCC_UNROLL_PRAGMA -#define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \ - for (iter = val; iter < size; iter++) -#endif - -#ifdef VIRTIO_CLANG_UNROLL_PRAGMA -#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ - for (iter = val; iter < size; iter++) -#endif - -#ifdef VIRTIO_ICC_UNROLL_PRAGMA -#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \ - for (iter = val; iter < size; iter++) -#endif - -#ifndef virtio_for_each_try_unroll -#define virtio_for_each_try_unroll(iter, val, num) \ - for (iter = val; iter < num; iter++) -#endif - -static inline void -virtio_update_batch_stats(struct virtnet_stats *stats, - uint16_t pkt_len1, - uint16_t pkt_len2, - uint16_t pkt_len3, - uint16_t pkt_len4) -{ - stats->bytes += pkt_len1; - stats->bytes += pkt_len2; - stats->bytes += pkt_len3; - stats->bytes += pkt_len4; -} - static inline int virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq, struct rte_mbuf **tx_pkts) @@ -200,46 +138,6 @@ virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq, return 0; } -static inline int -virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq, - struct rte_mbuf *txm) -{ - struct virtqueue *vq = txvq->vq; - struct virtio_hw *hw = vq->hw; - uint16_t hdr_size = hw->vtnet_hdr_size; - uint16_t slots, can_push; - int16_t need; - - /* How many main ring entries are needed to this Tx? - * any_layout => number of segments - * default => number of segments + 1 - */ - can_push = rte_mbuf_refcnt_read(txm) == 1 && - RTE_MBUF_DIRECT(txm) && - txm->nb_segs == 1 && - rte_pktmbuf_headroom(txm) >= hdr_size; - - slots = txm->nb_segs + !can_push; - need = slots - vq->vq_free_cnt; - - /* Positive value indicates it need free vring descriptors */ - if (unlikely(need > 0)) { - virtio_xmit_cleanup_inorder_packed(vq, need); - need = slots - vq->vq_free_cnt; - if (unlikely(need > 0)) { - PMD_TX_LOG(ERR, - "No free tx descriptors to transmit"); - return -1; - } - } - - /* Enqueue Packet buffers */ - virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push, 1); - - txvq->stats.bytes += txm->pkt_len; - return 0; -} - uint16_t virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -293,58 +191,6 @@ virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts, return nb_tx; } -/* Optionally fill offload information in structure */ -static inline int -virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) -{ - struct rte_net_hdr_lens hdr_lens; - uint32_t hdrlen, ptype; - int l4_supported = 0; - - /* nothing to do */ - if (hdr->flags == 0) - return 0; - - /* GSO not support in vec path, skip check */ - m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; - - ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); - m->packet_type = ptype; - if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || - (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || - (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) - l4_supported = 1; - - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; - if (hdr->csum_start <= hdrlen && l4_supported) { - m->ol_flags |= PKT_RX_L4_CKSUM_NONE; - } else { - /* Unknown proto or tunnel, do sw cksum. We can assume - * the cksum field is in the first segment since the - * buffers we provided to the host are large enough. - * In case of SCTP, this will be wrong since it's a CRC - * but there's nothing we can do. - */ - uint16_t csum = 0, off; - - rte_raw_cksum_mbuf(m, hdr->csum_start, - rte_pktmbuf_pkt_len(m) - hdr->csum_start, - &csum); - if (likely(csum != 0xffff)) - csum = ~csum; - off = hdr->csum_offset + hdr->csum_start; - if (rte_pktmbuf_data_len(m) >= off + 1) - *rte_pktmbuf_mtod_offset(m, uint16_t *, - off) = csum; - } - } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { - m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; - } - - return 0; -} - static inline uint16_t virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq, struct rte_mbuf **rx_pkts) @@ -445,114 +291,6 @@ virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq, return 0; } -static uint16_t -virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq, - struct rte_mbuf **rx_pkts) -{ - uint16_t used_idx, id; - uint32_t len; - struct virtqueue *vq = rxvq->vq; - struct virtio_hw *hw = vq->hw; - uint32_t hdr_size = hw->vtnet_hdr_size; - struct virtio_net_hdr *hdr; - struct vring_packed_desc *desc; - struct rte_mbuf *cookie; - - desc = vq->vq_packed.ring.desc; - used_idx = vq->vq_used_cons_idx; - if (!desc_is_used(&desc[used_idx], vq)) - return -1; - - len = desc[used_idx].len; - id = desc[used_idx].id; - cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie; - if (unlikely(cookie == NULL)) { - PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", - vq->vq_used_cons_idx); - return -1; - } - rte_prefetch0(cookie); - rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); - - cookie->data_off = RTE_PKTMBUF_HEADROOM; - cookie->ol_flags = 0; - cookie->pkt_len = (uint32_t)(len - hdr_size); - cookie->data_len = (uint32_t)(len - hdr_size); - - hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr + - RTE_PKTMBUF_HEADROOM - hdr_size); - if (hw->has_rx_offload) - virtio_vec_rx_offload(cookie, hdr); - - *rx_pkts = cookie; - - rxvq->stats.bytes += cookie->pkt_len; - - vq->vq_free_cnt++; - vq->vq_used_cons_idx++; - if (vq->vq_used_cons_idx >= vq->vq_nentries) { - vq->vq_used_cons_idx -= vq->vq_nentries; - vq->vq_packed.used_wrap_counter ^= 1; - } - - return 0; -} - -static inline void -virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq, - struct rte_mbuf **cookie, - uint16_t num) -{ - struct virtqueue *vq = rxvq->vq; - struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; - uint16_t flags = vq->vq_packed.cached_flags; - struct virtio_hw *hw = vq->hw; - struct vq_desc_extra *dxp; - uint16_t idx, i; - uint16_t batch_num, total_num = 0; - uint16_t head_idx = vq->vq_avail_idx; - uint16_t head_flag = vq->vq_packed.cached_flags; - uint64_t addr; - - do { - idx = vq->vq_avail_idx; - - batch_num = PACKED_BATCH_SIZE; - if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries)) - batch_num = vq->vq_nentries - idx; - if (unlikely((total_num + batch_num) > num)) - batch_num = num - total_num; - - virtio_for_each_try_unroll(i, 0, batch_num) { - dxp = &vq->vq_descx[idx + i]; - dxp->cookie = (void *)cookie[total_num + i]; - - addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) + - RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; - start_dp[idx + i].addr = addr; - start_dp[idx + i].len = cookie[total_num + i]->buf_len - - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; - if (total_num || i) { - virtqueue_store_flags_packed(&start_dp[idx + i], - flags, hw->weak_barriers); - } - } - - vq->vq_avail_idx += batch_num; - if (vq->vq_avail_idx >= vq->vq_nentries) { - vq->vq_avail_idx -= vq->vq_nentries; - vq->vq_packed.cached_flags ^= - VRING_PACKED_DESC_F_AVAIL_USED; - flags = vq->vq_packed.cached_flags; - } - total_num += batch_num; - } while (total_num < num); - - virtqueue_store_flags_packed(&start_dp[head_idx], head_flag, - hw->weak_barriers); - vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); -} - uint16_t virtio_recv_pkts_packed_vec(void *rx_queue, struct rte_mbuf **rx_pkts, From patchwork Fri Sep 11 12:09:05 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joyce Kong X-Patchwork-Id: 77385 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id C8BD4A04B5; Fri, 11 Sep 2020 14:09:35 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 506571C112; Fri, 11 Sep 2020 14:09:25 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 6D2351C0DC for ; Fri, 11 Sep 2020 14:09:23 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id ECF991396; Fri, 11 Sep 2020 05:09:22 -0700 (PDT) Received: from net-arm-thunderx2-03.shanghai.arm.com (net-arm-thunderx2-03.shanghai.arm.com [10.169.210.123]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 785673F68F; Fri, 11 Sep 2020 05:09:20 -0700 (PDT) From: Joyce Kong To: maxime.coquelin@redhat.com Cc: jerinj@marvell.com, dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, phil.yang@arm.com Date: Fri, 11 Sep 2020 20:09:05 +0800 Message-Id: <20200911120906.45995-3-joyce.kong@arm.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20200911120906.45995-1-joyce.kong@arm.com> References: <20200911120906.45995-1-joyce.kong@arm.com> MIME-Version: 1.0 Subject: [dpdk-dev] [RFC 2/3] net/virtio: add vectorized packed ring Rx NEON path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Optimize packed ring Rx batch path with NEON instructions. Signed-off-by: Joyce Kong --- drivers/net/virtio/meson.build | 1 + drivers/net/virtio/virtio_rxtx.c | 7 +- drivers/net/virtio/virtio_rxtx_packed.h | 16 ++ drivers/net/virtio/virtio_rxtx_packed_neon.c | 202 +++++++++++++++++++ 4 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 drivers/net/virtio/virtio_rxtx_packed_neon.c diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build index e1851b0a6..5af633686 100644 --- a/drivers/net/virtio/meson.build +++ b/drivers/net/virtio/meson.build @@ -34,6 +34,7 @@ elif arch_subdir == 'ppc' sources += files('virtio_rxtx_simple_altivec.c') elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64') sources += files('virtio_rxtx_simple_neon.c') + sources += files('virtio_rxtx_packed_neon.c') endif if is_linux diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index f915b8a2c..1deb77569 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -2020,7 +2020,8 @@ virtio_xmit_pkts_inorder(void *tx_queue, return nb_tx; } -#ifndef CC_AVX512_SUPPORT +#if !defined(CC_AVX512_SUPPORT) && !defined(RTE_ARCH_ARM) && \ + !defined(RTE_ARCH_ARM64) uint16_t virtio_recv_pkts_packed_vec(void *rx_queue __rte_unused, struct rte_mbuf **rx_pkts __rte_unused, @@ -2028,7 +2029,9 @@ virtio_recv_pkts_packed_vec(void *rx_queue __rte_unused, { return 0; } +#endif +#if !defined(CC_AVX512_SUPPORT) uint16_t virtio_xmit_pkts_packed_vec(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts __rte_unused, @@ -2036,4 +2039,4 @@ virtio_xmit_pkts_packed_vec(void *tx_queue __rte_unused, { return 0; } -#endif /* ifndef CC_AVX512_SUPPORT */ +#endif diff --git a/drivers/net/virtio/virtio_rxtx_packed.h b/drivers/net/virtio/virtio_rxtx_packed.h index b2447843b..fd2d6baa5 100644 --- a/drivers/net/virtio/virtio_rxtx_packed.h +++ b/drivers/net/virtio/virtio_rxtx_packed.h @@ -19,9 +19,16 @@ #include "virtqueue.h" #define BYTE_SIZE 8 + +#if defined(AVX512_SUPPORT) /* flag bits offset in packed ring desc higher 64bits */ #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) +/* flag bits offset in packed ring desc 32bits */ +#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ + offsetof(struct vring_packed_desc, id)) * BYTE_SIZE) +#endif #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \ FLAGS_BITS_OFFSET) @@ -44,8 +51,17 @@ /* net hdr short size mask */ #define NET_HDR_MASK 0x3F +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) +/* The cache line size on different aarh64 platforms are + * different, so put a four batch size here to match with + * the minimum cache line size. + */ +#define PACKED_BATCH_SIZE 4 +#else #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ sizeof(struct vring_packed_desc)) +#endif + #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) #ifdef VIRTIO_GCC_UNROLL_PRAGMA diff --git a/drivers/net/virtio/virtio_rxtx_packed_neon.c b/drivers/net/virtio/virtio_rxtx_packed_neon.c new file mode 100644 index 000000000..182afe5c6 --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx_packed_neon.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtio_rxtx_packed.h" +#include "virtqueue.h" + +static inline uint16_t +virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq, + struct rte_mbuf **rx_pkts) +{ + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; + uint16_t head_size = hw->vtnet_hdr_size; + uint16_t id = vq->vq_used_cons_idx; + struct vring_packed_desc *p_desc; + uint16_t i; + + if (id & PACKED_BATCH_MASK) + return -1; + + if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries)) + return -1; + + /* Map packed descriptor to mbuf fields. */ + uint8x16_t shuf_msk1 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* pkt_type set as unknown */ + 0, 1, /* octet 1~0, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16 bits of pkt_len, zero out */ + 0, 1, /* octet 1~0, 16 bits data_len */ + 0xFF, 0xFF, /* vlan tci set as unknown */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + uint8x16_t shuf_msk2 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* pkt_type set as unknown */ + 8, 9, /* octet 9~8, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16 bits of pkt_len, zero out */ + 8, 9, /* octet 9~8, 16 bits data_len */ + 0xFF, 0xFF, /* vlan tci set as unknown */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + /* Subtract the header length. */ + uint16x8_t len_adjust = { + 0, 0, /* ignore pkt_type field */ + head_size, /* sub head_size on pkt_len */ + 0, /* ignore high 16 bits of pkt_len */ + head_size, /* sub head_size on data_len */ + 0, 0, 0 /* ignore non-length fields */ + }; + + uint64x2_t desc[PACKED_BATCH_SIZE / 2]; + uint64x2x2_t mbp[PACKED_BATCH_SIZE / 2]; + uint64x2_t pkt_mb[PACKED_BATCH_SIZE]; + + p_desc = &vq->vq_packed.ring.desc[id]; + /* Load packed descriptor 0,1. */ + desc[0] = vld2q_u64((uint64_t *)(p_desc)).val[1]; + /* Load packed descriptor 2,3. */ + desc[1] = vld2q_u64((uint64_t *)(p_desc + 2)).val[1]; + + /* Only care avail/used bits. */ + uint32x4_t v_mask = vdupq_n_u32(PACKED_FLAGS_MASK); + uint32x4_t v_desc = vuzp2q_u32(vreinterpretq_u32_u64(desc[0]), + vreinterpretq_u32_u64(desc[1])); + uint32x4_t v_flag = vandq_u32(v_desc, v_mask); + + uint32x4_t v_used_flag = vdupq_n_u32(0); + if (vq->vq_packed.used_wrap_counter) + v_used_flag = vdupq_n_u32(PACKED_FLAGS_MASK); + + poly128_t desc_stats = vreinterpretq_p128_u32(vceqq_u32(v_flag, + v_used_flag)); + + /* Check all descs are used. */ + if (!desc_stats) + return -1; + + /* Load 2 mbuf pointers per time. */ + mbp[0] = vld2q_u64((uint64_t *)&vq->vq_descx[id]); + vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0].val[0]); + + mbp[1] = vld2q_u64((uint64_t *)&vq->vq_descx[id + 2]); + vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1].val[0]); + + /** + * Update data length and packet length for descriptor. + * structure of pkt_mb: + * -------------------------------------------------------------------- + * |4 octet pkt_type|4 octet pkt_len|2 octet data_len|2 octet vlan_tci| + * -------------------------------------------------------------------- + */ + pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8(vreinterpretq_u8_u64(desc[0]), shuf_msk1)); + pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8(vreinterpretq_u8_u64(desc[0]), shuf_msk2)); + pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8(vreinterpretq_u8_u64(desc[1]), shuf_msk1)); + pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8(vreinterpretq_u8_u64(desc[1]), shuf_msk2)); + + pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16(vreinterpretq_u16_u64(pkt_mb[0]), len_adjust)); + pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16(vreinterpretq_u16_u64(pkt_mb[1]), len_adjust)); + pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16(vreinterpretq_u16_u64(pkt_mb[2]), len_adjust)); + pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16(vreinterpretq_u16_u64(pkt_mb[3]), len_adjust)); + + vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1, pkt_mb[0]); + vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1, pkt_mb[1]); + vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1, pkt_mb[2]); + vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1, pkt_mb[3]); + + if (hw->has_rx_offload) { + virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + char *addr = (char *)rx_pkts[i]->buf_addr + + RTE_PKTMBUF_HEADROOM - head_size; + virtio_vec_rx_offload(rx_pkts[i], + (struct virtio_net_hdr *)addr); + } + } + + virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len, + rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len, + rx_pkts[3]->pkt_len); + + vq->vq_free_cnt += PACKED_BATCH_SIZE; + + vq->vq_used_cons_idx += PACKED_BATCH_SIZE; + if (vq->vq_used_cons_idx >= vq->vq_nentries) { + vq->vq_used_cons_idx -= vq->vq_nentries; + vq->vq_packed.used_wrap_counter ^= 1; + } + + return 0; +} + +uint16_t +virtio_recv_pkts_packed_vec(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; + uint16_t num, nb_rx = 0; + uint32_t nb_enqueued = 0; + uint16_t free_cnt = vq->vq_free_thresh; + + if (unlikely(hw->started == 0)) + return nb_rx; + + num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts); + if (likely(num > PACKED_BATCH_SIZE)) + num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE); + + while (num) { + if (!virtqueue_dequeue_batch_packed_vec(rxvq, + &rx_pkts[nb_rx])) { + nb_rx += PACKED_BATCH_SIZE; + num -= PACKED_BATCH_SIZE; + continue; + } + if (!virtqueue_dequeue_single_packed_vec(rxvq, + &rx_pkts[nb_rx])) { + nb_rx++; + num--; + continue; + } + break; + }; + + PMD_RX_LOG(DEBUG, "dequeue:%d", num); + + rxvq->stats.packets += nb_rx; + + if (likely(vq->vq_free_cnt >= free_cnt)) { + struct rte_mbuf *new_pkts[free_cnt]; + if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, + free_cnt) == 0)) { + virtio_recv_refill_packed_vec(rxvq, new_pkts, + free_cnt); + nb_enqueued += free_cnt; + } else { + struct rte_eth_dev *dev = + &rte_eth_devices[rxvq->port_id]; + dev->data->rx_mbuf_alloc_failed += free_cnt; + } + } + + if (likely(nb_enqueued)) { + if (unlikely(virtqueue_kick_prepare_packed(vq))) { + virtqueue_notify(vq); + PMD_RX_LOG(DEBUG, "Notified"); + } + } + + return nb_rx; +} From patchwork Fri Sep 11 12:09:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joyce Kong X-Patchwork-Id: 77386 X-Patchwork-Delegate: maxime.coquelin@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 31F1FA04B5; Fri, 11 Sep 2020 14:09:45 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 5F23F1C115; Fri, 11 Sep 2020 14:09:28 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 61AC91C115 for ; Fri, 11 Sep 2020 14:09:26 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id F10AA13A1; Fri, 11 Sep 2020 05:09:25 -0700 (PDT) Received: from net-arm-thunderx2-03.shanghai.arm.com (net-arm-thunderx2-03.shanghai.arm.com [10.169.210.123]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 7C6043F68F; Fri, 11 Sep 2020 05:09:23 -0700 (PDT) From: Joyce Kong To: maxime.coquelin@redhat.com Cc: jerinj@marvell.com, dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, phil.yang@arm.com Date: Fri, 11 Sep 2020 20:09:06 +0800 Message-Id: <20200911120906.45995-4-joyce.kong@arm.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20200911120906.45995-1-joyce.kong@arm.com> References: <20200911120906.45995-1-joyce.kong@arm.com> MIME-Version: 1.0 Subject: [dpdk-dev] [RFC 3/3] net/virtio: add election for packed vector Rx NEON path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add NEON vectorized path selection logic. Default setting comes from vectorized devarg, then checks each criteria. Packed ring vectorized neon path need: NEON is supported by compiler and host VERSION_1 and IN_ORDER features are negotiated mergeable feature is not negotiated LRO offloading is disabled Signed-off-by: Joyce Kong --- doc/guides/nics/virtio.rst | 4 ++-- drivers/net/virtio/virtio_ethdev.c | 19 +++++++++++++++---- drivers/net/virtio/virtio_user_ethdev.c | 2 ++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/guides/nics/virtio.rst b/doc/guides/nics/virtio.rst index 0daf25b22..fe9586699 100644 --- a/doc/guides/nics/virtio.rst +++ b/doc/guides/nics/virtio.rst @@ -483,8 +483,8 @@ according to below configuration: #. Packed virtqueue in-order non-mergeable path: If in-order feature is negotiated and Rx mergeable is not negotiated, this path will be selected. #. Packed virtqueue vectorized Rx path: If building and running environment support - AVX512 && in-order feature is negotiated && Rx mergeable is not negotiated && - TCP_LRO Rx offloading is disabled && vectorized option enabled, + (AVX512 || ARCH_ARM || ARCH_ARM64) && in-order feature is negotiated && Rx mergeable + is not negotiated && TCP_LRO Rx offloading is disabled && vectorized option enabled, this path will be selected. #. Packed virtqueue vectorized Tx path: If building and running environment support AVX512 && in-order feature is negotiated && vectorized option enabled, diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index dc0093bdf..b36ea98cf 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -1958,12 +1958,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) if (!vtpci_packed_queue(hw)) { hw->use_vec_rx = 1; } else { -#if !defined(CC_AVX512_SUPPORT) - PMD_DRV_LOG(INFO, - "building environment do not support packed ring vectorized"); -#else +#if defined(CC_AVX512_SUPPORT) hw->use_vec_rx = 1; hw->use_vec_tx = 1; +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) + hw->use_vec_rx = 1; +#else + PMD_DRV_LOG(INFO, + "building environment do not support packed ring vectorized"); #endif } } @@ -2311,6 +2313,15 @@ virtio_dev_configure(struct rte_eth_dev *dev) hw->use_vec_rx = 0; hw->use_vec_tx = 0; } +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) + if (hw->use_vec_rx && + (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) || + !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) || + !vtpci_with_feature(hw, VIRTIO_F_VERSION_1))) { + PMD_DRV_LOG(INFO, + "disabled packed ring vectorized path for requirements not met"); + hw->use_vec_rx = 0; + } #else hw->use_vec_rx = 0; hw->use_vec_tx = 0; diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 6003f6d50..1cfeb388f 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -766,6 +766,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) #if defined(CC_AVX512_SUPPORT) hw->use_vec_rx = 1; hw->use_vec_tx = 1; +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) + hw->use_vec_rx = 1; #else PMD_INIT_LOG(INFO, "building environment do not support packed ring vectorized");