[dpdk-dev] vhost: do deep copy while reallocate vq

Message ID 1516015939-11266-1-git-send-email-junjie.j.chen@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

junjie.j.chen@intel.com Jan. 15, 2018, 11:32 a.m. UTC
When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
This patch is to re-initlize the zmbuf list and also do the deep copy.

Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
---
 lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
  

Comments

Yang, Zhiyong Jan. 15, 2018, 9:05 a.m. UTC | #1
Hi Junjie,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Junjie Chen
> Sent: Monday, January 15, 2018 7:32 PM
> To: yliu@fridaylinux.org; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Chen, Junjie J <junjie.j.chen@intel.com>
> Subject: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
> 
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't
> perform deep copy, which lead to 1) zmbuf list not valid 2) remote memory
> access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
> 
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
>  lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index f4c7ce4..795462c 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
>  				"zero copy is force disabled\n");
>  			dev->dequeue_zero_copy = 0;
>  		}
> +		TAILQ_INIT(&vq->zmbuf_list);
>  	}
> 
>  	vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> numa_realloc(struct virtio_net *dev, int index)
>  	int oldnode, newnode;
>  	struct virtio_net *old_dev;
>  	struct vhost_virtqueue *old_vq, *vq;
> +	struct zcopy_mbuf *new_zmbuf;
> +	struct vring_used_elem *new_shadow_used_ring;
> +	struct batch_copy_elem *new_batch_copy_elems;
>  	int ret;
> 
>  	old_dev = dev;
> @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
>  			return dev;
> 
>  		memcpy(vq, old_vq, sizeof(*vq));
> +		TAILQ_INIT(&vq->zmbuf_list);
> +
> +		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> +			sizeof(struct zcopy_mbuf), 0, newnode);
> +		if (new_zmbuf) {
> +			rte_free(vq->zmbufs);
> +			vq->zmbufs = new_zmbuf;
> +		}

You need to consider how to handle the case  ( rte_malloc_socket return NULL).

> +		new_shadow_used_ring = rte_malloc_socket(NULL,
> +			vq->size * sizeof(struct vring_used_elem),
> +			RTE_CACHE_LINE_SIZE,
> +			newnode);
> +		if (new_shadow_used_ring) {
> +			rte_free(vq->shadow_used_ring);
> +			vq->shadow_used_ring = new_shadow_used_ring;
> +		}
> +

Ditto

> +		new_batch_copy_elems = rte_malloc_socket(NULL,
> +			vq->size * sizeof(struct batch_copy_elem),
> +			RTE_CACHE_LINE_SIZE,
> +			newnode);
> +		if (new_batch_copy_elems) {
> +			rte_free(vq->batch_copy_elems);
> +			vq->batch_copy_elems = new_batch_copy_elems;
> +		}

Ditto

> +
>  		rte_free(old_vq);
>  	}
> 
> --
> 2.0.1
  
junjie.j.chen@intel.com Jan. 15, 2018, 9:14 a.m. UTC | #2
Hi

> > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
> >  				"zero copy is force disabled\n");
> >  			dev->dequeue_zero_copy = 0;
> >  		}
> > +		TAILQ_INIT(&vq->zmbuf_list);
> >  	}
> >
> >  	vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> > numa_realloc(struct virtio_net *dev, int index)
> >  	int oldnode, newnode;
> >  	struct virtio_net *old_dev;
> >  	struct vhost_virtqueue *old_vq, *vq;
> > +	struct zcopy_mbuf *new_zmbuf;
> > +	struct vring_used_elem *new_shadow_used_ring;
> > +	struct batch_copy_elem *new_batch_copy_elems;
> >  	int ret;
> >
> >  	old_dev = dev;
> > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
> >  			return dev;
> >
> >  		memcpy(vq, old_vq, sizeof(*vq));
> > +		TAILQ_INIT(&vq->zmbuf_list);
> > +
> > +		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > +			sizeof(struct zcopy_mbuf), 0, newnode);
> > +		if (new_zmbuf) {
> > +			rte_free(vq->zmbufs);
> > +			vq->zmbufs = new_zmbuf;
> > +		}
> 
> You need to consider how to handle the case  ( rte_malloc_socket return
> NULL).

If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep vhost alive.

> 
> > +		new_shadow_used_ring = rte_malloc_socket(NULL,
> > +			vq->size * sizeof(struct vring_used_elem),
> > +			RTE_CACHE_LINE_SIZE,
> > +			newnode);
> > +		if (new_shadow_used_ring) {
> > +			rte_free(vq->shadow_used_ring);
> > +			vq->shadow_used_ring = new_shadow_used_ring;
> > +		}
> > +
> 
> Ditto
> 
> > +		new_batch_copy_elems = rte_malloc_socket(NULL,
> > +			vq->size * sizeof(struct batch_copy_elem),
> > +			RTE_CACHE_LINE_SIZE,
> > +			newnode);
> > +		if (new_batch_copy_elems) {
> > +			rte_free(vq->batch_copy_elems);
> > +			vq->batch_copy_elems = new_batch_copy_elems;
> > +		}
> 
> Ditto
> 
> > +
> >  		rte_free(old_vq);
> >  	}
> >
> > --
> > 2.0.1
  
Yang, Zhiyong Jan. 16, 2018, 12:57 a.m. UTC | #3
> -----Original Message-----
> From: Chen, Junjie J
> Sent: Monday, January 15, 2018 5:15 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; yliu@fridaylinux.org;
> maxime.coquelin@redhat.com
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
> 
> Hi
> 
> > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
> > >  				"zero copy is force disabled\n");
> > >  			dev->dequeue_zero_copy = 0;
> > >  		}
> > > +		TAILQ_INIT(&vq->zmbuf_list);
> > >  	}
> > >
> > >  	vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> > > numa_realloc(struct virtio_net *dev, int index)
> > >  	int oldnode, newnode;
> > >  	struct virtio_net *old_dev;
> > >  	struct vhost_virtqueue *old_vq, *vq;
> > > +	struct zcopy_mbuf *new_zmbuf;
> > > +	struct vring_used_elem *new_shadow_used_ring;
> > > +	struct batch_copy_elem *new_batch_copy_elems;
> > >  	int ret;
> > >
> > >  	old_dev = dev;
> > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
> > >  			return dev;
> > >
> > >  		memcpy(vq, old_vq, sizeof(*vq));
> > > +		TAILQ_INIT(&vq->zmbuf_list);
> > > +
> > > +		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > > +			sizeof(struct zcopy_mbuf), 0, newnode);
> > > +		if (new_zmbuf) {
> > > +			rte_free(vq->zmbufs);
> > > +			vq->zmbufs = new_zmbuf;
> > > +		}
> >
> > You need to consider how to handle the case  ( rte_malloc_socket
> > return NULL).
> 
> If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep vhost
> alive.

It sounds reasonable, another question is, 
for the 3 blocks of memory being allocated,  If some succeed , others fails,  Does it mean that
the code will run on different socket?  What's the perf impact if it happens.

thanks
Zhiyong
  
junjie.j.chen@intel.com Jan. 16, 2018, 7:38 a.m. UTC | #4
Hi
> > > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net
> *dev,
> > > >  				"zero copy is force disabled\n");
> > > >  			dev->dequeue_zero_copy = 0;
> > > >  		}
> > > > +		TAILQ_INIT(&vq->zmbuf_list);
> > > >  	}
> > > >
> > > >  	vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9
> @@
> > > > numa_realloc(struct virtio_net *dev, int index)
> > > >  	int oldnode, newnode;
> > > >  	struct virtio_net *old_dev;
> > > >  	struct vhost_virtqueue *old_vq, *vq;
> > > > +	struct zcopy_mbuf *new_zmbuf;
> > > > +	struct vring_used_elem *new_shadow_used_ring;
> > > > +	struct batch_copy_elem *new_batch_copy_elems;
> > > >  	int ret;
> > > >
> > > >  	old_dev = dev;
> > > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int
> index)
> > > >  			return dev;
> > > >
> > > >  		memcpy(vq, old_vq, sizeof(*vq));
> > > > +		TAILQ_INIT(&vq->zmbuf_list);
> > > > +
> > > > +		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > > > +			sizeof(struct zcopy_mbuf), 0, newnode);
> > > > +		if (new_zmbuf) {
> > > > +			rte_free(vq->zmbufs);
> > > > +			vq->zmbufs = new_zmbuf;
> > > > +		}
> > >
> > > You need to consider how to handle the case  ( rte_malloc_socket
> > > return NULL).
> >
> > If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep
> > vhost alive.
> 
> It sounds reasonable, another question is, for the 3 blocks of memory being
> allocated,  If some succeed , others fails,  Does it mean that the code will
> run on different socket?  What's the perf impact if it happens.

The original code doesn't do deep copy and thus access memory on different socket, this patch is to mitigate this situation. It does access remote memory when one of above allocation failed. 

I saw some performance improvement (24.8Gbits/s -> 26.1Gbit/s) on my dev machine when only reallocate for zmbufs, while I didn't see significant performance difference when allocating vring_used_elem 
and batch_copy_elem.
  
Maxime Coquelin Jan. 16, 2018, 8:54 a.m. UTC | #5
On 01/15/2018 12:32 PM, Junjie Chen wrote:
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
> deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
> 
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
>   lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
>   1 file changed, 31 insertions(+)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  
Yang, Zhiyong Jan. 17, 2018, 1:36 a.m. UTC | #6
> -----Original Message-----
> From: Chen, Junjie J
> Sent: Tuesday, January 16, 2018 3:39 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; yliu@fridaylinux.org;
> maxime.coquelin@redhat.com
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
> 
> Hi
> > > > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net
> > *dev,
> > > > >  				"zero copy is force disabled\n");
> > > > >  			dev->dequeue_zero_copy = 0;
> > > > >  		}
> > > > > +		TAILQ_INIT(&vq->zmbuf_list);
> > > > >  	}
> > > > >
> > > > >  	vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6
> +262,9
> > @@
> > > > > numa_realloc(struct virtio_net *dev, int index)
> > > > >  	int oldnode, newnode;
> > > > >  	struct virtio_net *old_dev;
> > > > >  	struct vhost_virtqueue *old_vq, *vq;
> > > > > +	struct zcopy_mbuf *new_zmbuf;
> > > > > +	struct vring_used_elem *new_shadow_used_ring;
> > > > > +	struct batch_copy_elem *new_batch_copy_elems;
> > > > >  	int ret;
> > > > >
> > > > >  	old_dev = dev;
> > > > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int
> > index)
> > > > >  			return dev;
> > > > >
> > > > >  		memcpy(vq, old_vq, sizeof(*vq));
> > > > > +		TAILQ_INIT(&vq->zmbuf_list);
> > > > > +
> > > > > +		new_zmbuf = rte_malloc_socket(NULL, vq-
> >zmbuf_size *
> > > > > +			sizeof(struct zcopy_mbuf), 0, newnode);
> > > > > +		if (new_zmbuf) {
> > > > > +			rte_free(vq->zmbufs);
> > > > > +			vq->zmbufs = new_zmbuf;
> > > > > +		}
> > > >
> > > > You need to consider how to handle the case  ( rte_malloc_socket
> > > > return NULL).
> > >
> > > If it failed to allocate new_zmbuf, it uses old zmbufs, so as to
> > > keep vhost alive.
> >
> > It sounds reasonable, another question is, for the 3 blocks of memory
> > being allocated,  If some succeed , others fails,  Does it mean that
> > the code will run on different socket?  What's the perf impact if it happens.
> 
> The original code doesn't do deep copy and thus access memory on different
> socket, this patch is to mitigate this situation. It does access remote memory
> when one of above allocation failed.
> 
> I saw some performance improvement (24.8Gbits/s -> 26.1Gbit/s) on my dev
> machine when only reallocate for zmbufs, while I didn't see significant
> performance difference when allocating vring_used_elem and
> batch_copy_elem.

Great, 

Reviewed-by: Zhiyong Yang <zhiyong.yang@intel.com> 

Thanks
Zhiyong
  
Yuanhan Liu Jan. 17, 2018, 2:46 p.m. UTC | #7
On Mon, Jan 15, 2018 at 06:32:19AM -0500, Junjie Chen wrote:
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
> deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
> 
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---

Applied to dpdk-next-virtio.

Thanks.

	--yliu
  

Patch

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index f4c7ce4..795462c 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -227,6 +227,7 @@  vhost_user_set_vring_num(struct virtio_net *dev,
 				"zero copy is force disabled\n");
 			dev->dequeue_zero_copy = 0;
 		}
+		TAILQ_INIT(&vq->zmbuf_list);
 	}
 
 	vq->shadow_used_ring = rte_malloc(NULL,
@@ -261,6 +262,9 @@  numa_realloc(struct virtio_net *dev, int index)
 	int oldnode, newnode;
 	struct virtio_net *old_dev;
 	struct vhost_virtqueue *old_vq, *vq;
+	struct zcopy_mbuf *new_zmbuf;
+	struct vring_used_elem *new_shadow_used_ring;
+	struct batch_copy_elem *new_batch_copy_elems;
 	int ret;
 
 	old_dev = dev;
@@ -285,6 +289,33 @@  numa_realloc(struct virtio_net *dev, int index)
 			return dev;
 
 		memcpy(vq, old_vq, sizeof(*vq));
+		TAILQ_INIT(&vq->zmbuf_list);
+
+		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
+			sizeof(struct zcopy_mbuf), 0, newnode);
+		if (new_zmbuf) {
+			rte_free(vq->zmbufs);
+			vq->zmbufs = new_zmbuf;
+		}
+
+		new_shadow_used_ring = rte_malloc_socket(NULL,
+			vq->size * sizeof(struct vring_used_elem),
+			RTE_CACHE_LINE_SIZE,
+			newnode);
+		if (new_shadow_used_ring) {
+			rte_free(vq->shadow_used_ring);
+			vq->shadow_used_ring = new_shadow_used_ring;
+		}
+
+		new_batch_copy_elems = rte_malloc_socket(NULL,
+			vq->size * sizeof(struct batch_copy_elem),
+			RTE_CACHE_LINE_SIZE,
+			newnode);
+		if (new_batch_copy_elems) {
+			rte_free(vq->batch_copy_elems);
+			vq->batch_copy_elems = new_batch_copy_elems;
+		}
+
 		rte_free(old_vq);
 	}