[v4,06/14] vhost: flush vhost enqueue shadow ring by batch

Message ID 20191009133849.69002-7-yong.liu@intel.com
State Superseded
Delegated to: Maxime Coquelin
Headers show
Series
  • vhost packed ring performance optimization
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Liu, Yong Oct. 9, 2019, 1:38 p.m.
Buffer vhost enqueue shadow ring update, flush shadow ring until
buffered descriptors number exceed one batch. Thus virtio can receive
packets at a faster frequency.

Signed-off-by: Marvin Liu <yong.liu@intel.com>

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index e50e137ca..18a207fc6 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -163,6 +163,7 @@  struct vhost_virtqueue {
 		struct vring_used_elem_packed *shadow_used_packed;
 	};
 	uint16_t                shadow_used_idx;
+	uint16_t		enqueue_shadow_count;
 	struct vhost_vring_addr ring_addrs;
 
 	struct batch_copy_elem	*batch_copy_elems;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2ff7329b2..f85619dc2 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -169,6 +169,24 @@  update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
 	vq->shadow_used_packed[i].count = count;
 }
 
+static __rte_always_inline void
+update_enqueue_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+	uint16_t desc_idx, uint32_t len, uint16_t count)
+{
+	/* enqueue shadow flush action aligned with batch num */
+	if (!vq->shadow_used_idx)
+		vq->enqueue_shadow_count = vq->last_used_idx &
+						PACKED_BATCH_MASK;
+
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_packed[i].id  = desc_idx;
+	vq->shadow_used_packed[i].len = len;
+	vq->shadow_used_packed[i].count = count;
+
+	vq->enqueue_shadow_count += count;
+}
+
 static inline void
 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
@@ -198,6 +216,23 @@  do_data_copy_dequeue(struct vhost_virtqueue *vq)
 	vq->batch_copy_nb_elems = 0;
 }
 
+static __rte_always_inline void
+flush_enqueue_packed(struct virtio_net *dev,
+	struct vhost_virtqueue *vq, uint32_t len[], uint16_t id[],
+	uint16_t count[], uint16_t num_buffers)
+{
+	int i;
+	for (i = 0; i < num_buffers; i++) {
+		update_enqueue_shadow_used_ring_packed(vq, id[i], len[i],
+						       count[i]);
+
+		if (vq->enqueue_shadow_count >= PACKED_BATCH_SIZE) {
+			do_data_copy_enqueue(dev, vq);
+			flush_shadow_used_ring_packed(dev, vq);
+		}
+	}
+}
+
 /* avoid write operation when necessary, to lessen cache issues */
 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
 	if ((var) != (val))			\
@@ -786,6 +821,9 @@  vhost_enqueue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint16_t desc_count;
 	uint32_t size = pkt->pkt_len + dev->vhost_hlen;
 	uint16_t num_buffers = 0;
+	uint32_t buffer_len[vq->size];
+	uint16_t buffer_buf_id[vq->size];
+	uint16_t buffer_desc_count[vq->size];
 
 	if (rxvq_is_mergeable(dev))
 		max_tries = vq->size - 1;
@@ -811,6 +849,9 @@  vhost_enqueue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		len = RTE_MIN(len, size);
 		size -= len;
 
+		buffer_len[num_buffers] = len;
+		buffer_buf_id[num_buffers] = buf_id;
+		buffer_desc_count[num_buffers] = desc_count;
 		num_buffers += 1;
 
 		*nr_descs += desc_count;
@@ -822,6 +863,8 @@  vhost_enqueue_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0)
 		return -1;
 
+	flush_enqueue_packed(dev, vq, buffer_len, buffer_buf_id,
+			     buffer_desc_count, num_buffers);
 	return 0;
 }