From patchwork Tue Jul 18 15:40:21 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 27038 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 6A52E7CAB; Tue, 18 Jul 2017 17:42:02 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id D0F68376C for ; Tue, 18 Jul 2017 17:41:48 +0200 (CEST) Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Jul 2017 08:41:48 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.40,378,1496127600"; d="scan'208";a="994343854" Received: from dpdk25.sh.intel.com ([10.67.111.96]) by orsmga003.jf.intel.com with ESMTP; 18 Jul 2017 08:41:46 -0700 From: Tiwei Bie To: dev@dpdk.org, jfreiman@redhat.com Date: Tue, 18 Jul 2017 23:40:21 +0800 Message-Id: <1500392421-76672-7-git-send-email-tiwei.bie@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1500392421-76672-1-git-send-email-tiwei.bie@intel.com> References: <1500392421-76672-1-git-send-email-tiwei.bie@intel.com> Subject: [dpdk-dev] [RFC 6/6] virtio1.1: introduce the DESC_WB flag X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch introduces the DESC_WB flag, and adopts it in the virtio Tx path. This flag can help to reduce the memory write operations on the descriptor ring from the backend. So it could help to reduce the PCIe transactions in a hardware backend case. Normally the DESC_WB like mechanism is used in a head/tail design. But in this prototype, head/tail isn't used. So backend still needs to judge whether a desc is available by checking the DESC_HW flag. Then some tricks are needed by the frontend when setting the DESC_WB flags. Otherwise backend may see some descriptors' DESC_HW are set, but actually they are not available to the backend. Signed-off-by: Tiwei Bie --- drivers/net/virtio/virtio-1.1.h | 1 + drivers/net/virtio/virtio_rxtx.c | 14 +++++++++ drivers/net/virtio/virtio_rxtx_1.1.c | 55 +++++++++++++++++++++++++----------- drivers/net/virtio/virtqueue.h | 2 ++ lib/librte_vhost/virtio-1.1.h | 1 + lib/librte_vhost/virtio_net.c | 37 +++++++++++++++--------- 6 files changed, 79 insertions(+), 31 deletions(-) diff --git a/drivers/net/virtio/virtio-1.1.h b/drivers/net/virtio/virtio-1.1.h index 48cbb18..bd76761 100644 --- a/drivers/net/virtio/virtio-1.1.h +++ b/drivers/net/virtio/virtio-1.1.h @@ -8,6 +8,7 @@ #define BATCH_NOT_FIRST 0x0010 #define BATCH_NOT_LAST 0x0020 #define DESC_HW 0x0080 +#define DESC_WB 0x0100 struct vring_desc_1_1 { uint64_t addr; diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 7be9aae..0248638 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -153,6 +153,10 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, #define DEFAULT_TX_FREE_THRESH 32 #endif +#ifndef DEFAULT_TX_RS_THRESH +#define DEFAULT_TX_RS_THRESH 32 +#endif + /* Cleanup from completed transmits. */ static void virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) @@ -434,6 +438,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; struct virtnet_tx *txvq; uint16_t tx_free_thresh; + uint16_t tx_rs_thresh; uint16_t desc_idx; PMD_INIT_FUNC_TRACE(); @@ -451,6 +456,9 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, if (tx_free_thresh == 0) tx_free_thresh = RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + tx_rs_thresh = tx_conf->tx_rs_thresh; + if (tx_rs_thresh == 0) + tx_rs_thresh = DEFAULT_TX_RS_THRESH; if (tx_free_thresh >= (vq->vq_nentries - 3)) { RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " @@ -461,7 +469,13 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + if (!rte_is_power_of_2(tx_free_thresh)) { + RTE_LOG(ERR, PMD, "tx_free_thresh is not powerof 2"); + return -EINVAL; + } + vq->vq_free_thresh = tx_free_thresh; + vq->vq_rs_thresh = tx_rs_thresh; if (hw->use_simple_rxtx) { uint16_t mid_idx = vq->vq_nentries >> 1; diff --git a/drivers/net/virtio/virtio_rxtx_1.1.c b/drivers/net/virtio/virtio_rxtx_1.1.c index 883a027..38f6a4a 100644 --- a/drivers/net/virtio/virtio_rxtx_1.1.c +++ b/drivers/net/virtio/virtio_rxtx_1.1.c @@ -63,21 +63,28 @@ #include "virtio_rxtx.h" /* Cleanup from completed transmits. */ -static void +static inline int virtio_xmit_cleanup(struct virtqueue *vq) { - uint16_t idx; - uint16_t size = vq->vq_nentries; + uint16_t clean_to, idx; + uint16_t mask = vq->vq_nentries - 1; struct vring_desc_1_1 *desc = vq->vq_ring.desc_1_1; + uint16_t last_cleaned = vq->vq_used_cons_idx - 1; - idx = vq->vq_used_cons_idx & (size - 1); - while ((desc[idx].flags & DESC_HW) == 0) { - idx = (++vq->vq_used_cons_idx) & (size - 1); - vq->vq_free_cnt++; - - if (vq->vq_free_cnt >= size) - break; + clean_to = last_cleaned + vq->vq_rs_thresh; + if ((desc[clean_to & mask].flags & DESC_HW) != 0) { + PMD_TX_LOG(DEBUG, "TX descriptor %d is not done", + clean_to & mask); + return -1; } + + for (idx = last_cleaned + 2; idx < clean_to; idx++) + desc[idx & mask].flags &= ~DESC_HW; + + vq->vq_used_cons_idx = clean_to + 1; + vq->vq_free_cnt += vq->vq_rs_thresh; + + return 0; } uint16_t @@ -90,6 +97,7 @@ virtio_xmit_pkts_1_1(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts struct vring_desc_1_1 *desc = vq->vq_ring.desc_1_1; uint16_t idx; struct vq_desc_extra *dxp; + uint16_t nb_needed, nb_used = vq->vq_nb_used; if (unlikely(nb_pkts < 1)) return nb_pkts; @@ -103,19 +111,23 @@ virtio_xmit_pkts_1_1(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts struct rte_mbuf *txm = tx_pkts[i]; struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; - if (unlikely(txm->nb_segs + 1 > vq->vq_free_cnt)) { - virtio_xmit_cleanup(vq); + nb_needed = txm->nb_segs + 1; - if (unlikely(txm->nb_segs + 1 > vq->vq_free_cnt)) { - PMD_TX_LOG(ERR, - "No free tx descriptors to transmit"); - break; + if (unlikely(nb_needed > vq->vq_free_cnt)) { + if (unlikely(virtio_xmit_cleanup(vq) != 0)) + goto end_of_tx; + + if (unlikely(nb_needed > vq->vq_rs_thresh)) { + while (nb_needed > vq->vq_free_cnt) { + if (virtio_xmit_cleanup(vq) != 0) + goto end_of_tx; + } } } txvq->stats.bytes += txm->pkt_len; - vq->vq_free_cnt -= txm->nb_segs + 1; + vq->vq_free_cnt -= nb_needed; idx = (vq->vq_avail_idx++) & (vq->vq_nentries - 1); dxp = &vq->vq_descx[idx]; @@ -129,22 +141,31 @@ virtio_xmit_pkts_1_1(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts desc[idx].flags = VRING_DESC_F_NEXT; if (i != 0) desc[idx].flags |= DESC_HW; + nb_used = (nb_used + 1) & ~vq->vq_rs_thresh; + if (nb_used == 0 || nb_used == 1) + desc[idx].flags |= DESC_WB; do { idx = (vq->vq_avail_idx++) & (vq->vq_nentries - 1); desc[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(txm, vq); desc[idx].len = txm->data_len; desc[idx].flags = DESC_HW | VRING_DESC_F_NEXT; + nb_used = (nb_used + 1) & ~vq->vq_rs_thresh; + if (nb_used == 0 || nb_used == 1) + desc[idx].flags |= DESC_WB; } while ((txm = txm->next) != NULL); desc[idx].flags &= ~VRING_DESC_F_NEXT; } +end_of_tx: if (likely(i)) { rte_smp_wmb(); vq->vq_ring.desc_1_1[head_idx & (vq->vq_nentries - 1)].flags |= DESC_HW; } + vq->vq_nb_used = nb_used; + txvq->stats.packets += i; txvq->stats.errors += nb_pkts - i; diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 45f49d7..b104afa 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -194,6 +194,8 @@ struct virtqueue { uint16_t vq_free_cnt; /**< num of desc available */ uint16_t vq_avail_idx; /**< sync until needed */ uint16_t vq_free_thresh; /**< free threshold */ + uint16_t vq_rs_thresh; /**< RS threshold */ + uint16_t vq_nb_used ; /**< num of unreported desc */ void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; diff --git a/lib/librte_vhost/virtio-1.1.h b/lib/librte_vhost/virtio-1.1.h index 4241d0a..91e21f1 100644 --- a/lib/librte_vhost/virtio-1.1.h +++ b/lib/librte_vhost/virtio-1.1.h @@ -12,6 +12,7 @@ #define BATCH_NOT_FIRST 0x0010 #define BATCH_NOT_LAST 0x0020 #define DESC_HW 0x0080 +#define DESC_WB 0x0100 struct vring_desc_1_1 { __le64 addr; diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 08d53d9..96b84e5 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -1100,7 +1100,8 @@ mbuf_is_consumed(struct rte_mbuf *m) static inline uint16_t __attribute__((always_inline)) dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf *m, - struct vring_desc_1_1 *descs, uint16_t *desc_idx) + struct vring_desc_1_1 *descs, uint16_t *desc_idx, + struct vring_desc_1_1 **dps, uint16_t *_cnt) { struct vring_desc_1_1 *desc; uint64_t desc_addr; @@ -1110,6 +1111,7 @@ dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf *cur = m, *prev = m; struct virtio_net_hdr *hdr = NULL; uint16_t head_idx = *desc_idx; + uint16_t cnt = *_cnt; desc = &descs[(head_idx++) & (vq->size - 1)]; if (unlikely((desc->len < dev->vhost_hlen)) || @@ -1132,6 +1134,9 @@ dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, */ if (likely((desc->len == dev->vhost_hlen) && (desc->flags & VRING_DESC_F_NEXT) != 0)) { + if (desc->flags & DESC_WB) + dps[cnt++] = desc; + desc = &descs[(head_idx++) & (vq->size - 1)]; if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) return -1; @@ -1172,6 +1177,9 @@ dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, if ((desc->flags & VRING_DESC_F_NEXT) == 0) break; + if (desc->flags & DESC_WB) + dps[cnt++] = desc; + desc = &descs[(head_idx++) & (vq->size - 1)]; if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) return -1; @@ -1217,7 +1225,11 @@ dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, if (hdr) vhost_dequeue_offload(hdr, m); + if (desc->flags & DESC_WB) + dps[cnt++] = desc; + *desc_idx = head_idx; + *_cnt = cnt; return 0; } @@ -1229,7 +1241,8 @@ vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, { uint16_t i; struct vring_desc_1_1 *desc = vq->desc_1_1; - uint16_t head_idx = vq->last_used_idx; + struct vring_desc_1_1 *dps[64]; + uint16_t cnt = 0; uint16_t desc_idx; int err; @@ -1250,26 +1263,22 @@ vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq, break; } - err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc, &desc_idx); + err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc, + &desc_idx, dps, &cnt); if (unlikely(err)) { rte_pktmbuf_free(pkts[i]); break; } } + count = i; vq->last_used_idx = desc_idx; - if (likely(i)) { - rte_prefetch0(&desc[head_idx & (vq->size - 1)]); - for (desc_idx = head_idx + 1; - desc_idx != vq->last_used_idx; - desc_idx++) { - desc[desc_idx & (vq->size - 1)].flags = 0; - } - rte_smp_wmb(); - desc[head_idx & (vq->size - 1)].flags = 0; - } - return i; + rte_smp_wmb(); + for (i = 0; i < cnt; i++) + dps[i]->flags = 0; + + return count; } uint16_t