[dpdk-dev,v3,16/25] virtio: Free mbuf's with threshold

Message ID 1422516249-14596-17-git-send-email-changchun.ouyang@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Ouyang Changchun Jan. 29, 2015, 7:24 a.m. UTC
This makes virtio driver work like ixgbe. Transmit buffers are
held until a transmit threshold is reached. The previous behavior
was to hold mbuf's until the ring entry was reused which caused
more memory usage than needed.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_virtio/virtio_ethdev.c |  7 ++--
 lib/librte_pmd_virtio/virtio_rxtx.c   | 75 +++++++++++++++++++++++++----------
 lib/librte_pmd_virtio/virtqueue.h     |  3 +-
 3 files changed, 60 insertions(+), 25 deletions(-)
  

Patch

diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c
index b30ab2a..8cd2d51 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -176,15 +176,16 @@  virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 
 	virtqueue_notify(vq);
 
-	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
+	rte_rmb();
+	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+		rte_rmb();
 		usleep(100);
+	}
 
 	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
 		uint32_t idx, desc_idx, used_idx;
 		struct vring_used_elem *uep;
 
-		virtio_rmb();
-
 		used_idx = (uint32_t)(vq->vq_used_cons_idx
 				& (vq->vq_nentries - 1));
 		uep = &vq->vq_ring.used->ring[used_idx];
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c
index b6d6832..580701a 100644
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c
@@ -129,17 +129,32 @@  virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
 	return i;
 }
 
+#ifndef DEFAULT_TX_FREE_THRESH
+#define DEFAULT_TX_FREE_THRESH 32
+#endif
+
+/* Cleanup from completed transmits. */
 static void
-virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
+virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
 {
-	struct vring_used_elem *uep;
-	uint16_t used_idx, desc_idx;
+	uint16_t i, used_idx, desc_idx;
+	for (i = 0; i < num; i++) {
+		struct vring_used_elem *uep;
+		struct vq_desc_extra *dxp;
+
+		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+		uep = &vq->vq_ring.used->ring[used_idx];
+		dxp = &vq->vq_descx[used_idx];
+
+		desc_idx = (uint16_t) uep->id;
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
 
-	used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-	uep = &vq->vq_ring.used->ring[used_idx];
-	desc_idx = (uint16_t) uep->id;
-	vq->vq_used_cons_idx++;
-	vq_ring_free_chain(vq, desc_idx);
+		if (dxp->cookie != NULL) {
+			rte_pktmbuf_free(dxp->cookie);
+			dxp->cookie = NULL;
+		}
+	}
 }
 
 
@@ -203,8 +218,6 @@  virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
 
 	idx = head_idx;
 	dxp = &txvq->vq_descx[idx];
-	if (dxp->cookie != NULL)
-		rte_pktmbuf_free(dxp->cookie);
 	dxp->cookie = (void *)cookie;
 	dxp->ndescs = needed;
 
@@ -404,6 +417,7 @@  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 {
 	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
 	struct virtqueue *vq;
+	uint16_t tx_free_thresh;
 	int ret;
 
 	PMD_INIT_FUNC_TRACE();
@@ -421,6 +435,22 @@  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		return ret;
 	}
 
+	tx_free_thresh = tx_conf->tx_free_thresh;
+	if (tx_free_thresh == 0)
+		tx_free_thresh =
+			RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
+
+	if (tx_free_thresh >= (vq->vq_nentries - 3)) {
+		RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
+			"number of TX entries minus 3 (%u)."
+			" (tx_free_thresh=%u port=%u queue=%u)\n",
+			vq->vq_nentries - 3,
+			tx_free_thresh, dev->data->port_id, queue_idx);
+		return -EINVAL;
+	}
+
+	vq->vq_free_thresh = tx_free_thresh;
+
 	dev->data->tx_queues[queue_idx] = vq;
 	return 0;
 }
@@ -688,11 +718,9 @@  virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	struct virtqueue *txvq = tx_queue;
 	struct rte_mbuf *txm;
-	uint16_t nb_used, nb_tx, num;
+	uint16_t nb_used, nb_tx;
 	int error;
 
-	nb_tx = 0;
-
 	if (unlikely(nb_pkts < 1))
 		return nb_pkts;
 
@@ -700,21 +728,26 @@  virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	nb_used = VIRTQUEUE_NUSED(txvq);
 
 	virtio_rmb();
+	if (likely(nb_used > txvq->vq_free_thresh))
+		virtio_xmit_cleanup(txvq, nb_used);
 
-	num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
+	nb_tx = 0;
 
 	while (nb_tx < nb_pkts) {
 		/* Need one more descriptor for virtio header. */
 		int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
-		int deq_cnt = RTE_MIN(need, (int)num);
 
-		num -= (deq_cnt > 0) ? deq_cnt : 0;
-		while (deq_cnt > 0) {
-			virtqueue_dequeue_pkt_tx(txvq);
-			deq_cnt--;
+		/*Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			nb_used = VIRTQUEUE_NUSED(txvq);
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_used);
+
+			virtio_xmit_cleanup(txvq, need);
+			need = (int)tx_pkts[nb_tx]->nb_segs -
+				txvq->vq_free_cnt + 1;
 		}
 
-		need = (int)tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
 		/*
 		 * Zero or negative value indicates it has enough free
 		 * descriptors to use for transmitting.
@@ -723,7 +756,7 @@  virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			txm = tx_pkts[nb_tx];
 
 			/* Do VLAN tag insertion */
-			if (txm->ol_flags & PKT_TX_VLAN_PKT) {
+			if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
 				error = rte_vlan_insert(&txm);
 				if (unlikely(error)) {
 					rte_pktmbuf_free(txm);
diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h
index d210f4f..6c45c27 100644
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ b/lib/librte_pmd_virtio/virtqueue.h
@@ -164,6 +164,7 @@  struct virtqueue {
 	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
 	uint16_t    queue_id;             /**< DPDK queue index. */
 	uint8_t     port_id;              /**< Device port identifier. */
+	uint16_t    vq_queue_index;       /**< PCI queue index */
 
 	void        *vq_ring_virt_mem;    /**< linear address of vring*/
 	unsigned int vq_ring_size;
@@ -172,7 +173,7 @@  struct virtqueue {
 	struct vring vq_ring;    /**< vring keeping desc, used and avail */
 	uint16_t    vq_free_cnt; /**< num of desc available */
 	uint16_t    vq_nentries; /**< vring desc numbers */
-	uint16_t    vq_queue_index;       /**< PCI queue index */
+	uint16_t    vq_free_thresh; /**< free threshold */
 	/**
 	 * Head of the free chain in the descriptor table. If
 	 * there are no free descriptors, this will be set to