[dpdk-dev] vhost: do deep copy while reallocate vq
Checks
Commit Message
When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
This patch is to re-initlize the zmbuf list and also do the deep copy.
Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
---
lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
Comments
Hi Junjie,
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Junjie Chen
> Sent: Monday, January 15, 2018 7:32 PM
> To: yliu@fridaylinux.org; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Chen, Junjie J <junjie.j.chen@intel.com>
> Subject: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
>
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't
> perform deep copy, which lead to 1) zmbuf list not valid 2) remote memory
> access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
>
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
> lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index f4c7ce4..795462c 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
> "zero copy is force disabled\n");
> dev->dequeue_zero_copy = 0;
> }
> + TAILQ_INIT(&vq->zmbuf_list);
> }
>
> vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> numa_realloc(struct virtio_net *dev, int index)
> int oldnode, newnode;
> struct virtio_net *old_dev;
> struct vhost_virtqueue *old_vq, *vq;
> + struct zcopy_mbuf *new_zmbuf;
> + struct vring_used_elem *new_shadow_used_ring;
> + struct batch_copy_elem *new_batch_copy_elems;
> int ret;
>
> old_dev = dev;
> @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
> return dev;
>
> memcpy(vq, old_vq, sizeof(*vq));
> + TAILQ_INIT(&vq->zmbuf_list);
> +
> + new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> + sizeof(struct zcopy_mbuf), 0, newnode);
> + if (new_zmbuf) {
> + rte_free(vq->zmbufs);
> + vq->zmbufs = new_zmbuf;
> + }
You need to consider how to handle the case ( rte_malloc_socket return NULL).
> + new_shadow_used_ring = rte_malloc_socket(NULL,
> + vq->size * sizeof(struct vring_used_elem),
> + RTE_CACHE_LINE_SIZE,
> + newnode);
> + if (new_shadow_used_ring) {
> + rte_free(vq->shadow_used_ring);
> + vq->shadow_used_ring = new_shadow_used_ring;
> + }
> +
Ditto
> + new_batch_copy_elems = rte_malloc_socket(NULL,
> + vq->size * sizeof(struct batch_copy_elem),
> + RTE_CACHE_LINE_SIZE,
> + newnode);
> + if (new_batch_copy_elems) {
> + rte_free(vq->batch_copy_elems);
> + vq->batch_copy_elems = new_batch_copy_elems;
> + }
Ditto
> +
> rte_free(old_vq);
> }
>
> --
> 2.0.1
Hi
> > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
> > "zero copy is force disabled\n");
> > dev->dequeue_zero_copy = 0;
> > }
> > + TAILQ_INIT(&vq->zmbuf_list);
> > }
> >
> > vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> > numa_realloc(struct virtio_net *dev, int index)
> > int oldnode, newnode;
> > struct virtio_net *old_dev;
> > struct vhost_virtqueue *old_vq, *vq;
> > + struct zcopy_mbuf *new_zmbuf;
> > + struct vring_used_elem *new_shadow_used_ring;
> > + struct batch_copy_elem *new_batch_copy_elems;
> > int ret;
> >
> > old_dev = dev;
> > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
> > return dev;
> >
> > memcpy(vq, old_vq, sizeof(*vq));
> > + TAILQ_INIT(&vq->zmbuf_list);
> > +
> > + new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > + sizeof(struct zcopy_mbuf), 0, newnode);
> > + if (new_zmbuf) {
> > + rte_free(vq->zmbufs);
> > + vq->zmbufs = new_zmbuf;
> > + }
>
> You need to consider how to handle the case ( rte_malloc_socket return
> NULL).
If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep vhost alive.
>
> > + new_shadow_used_ring = rte_malloc_socket(NULL,
> > + vq->size * sizeof(struct vring_used_elem),
> > + RTE_CACHE_LINE_SIZE,
> > + newnode);
> > + if (new_shadow_used_ring) {
> > + rte_free(vq->shadow_used_ring);
> > + vq->shadow_used_ring = new_shadow_used_ring;
> > + }
> > +
>
> Ditto
>
> > + new_batch_copy_elems = rte_malloc_socket(NULL,
> > + vq->size * sizeof(struct batch_copy_elem),
> > + RTE_CACHE_LINE_SIZE,
> > + newnode);
> > + if (new_batch_copy_elems) {
> > + rte_free(vq->batch_copy_elems);
> > + vq->batch_copy_elems = new_batch_copy_elems;
> > + }
>
> Ditto
>
> > +
> > rte_free(old_vq);
> > }
> >
> > --
> > 2.0.1
> -----Original Message-----
> From: Chen, Junjie J
> Sent: Monday, January 15, 2018 5:15 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; yliu@fridaylinux.org;
> maxime.coquelin@redhat.com
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
>
> Hi
>
> > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
> > > "zero copy is force disabled\n");
> > > dev->dequeue_zero_copy = 0;
> > > }
> > > + TAILQ_INIT(&vq->zmbuf_list);
> > > }
> > >
> > > vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9 @@
> > > numa_realloc(struct virtio_net *dev, int index)
> > > int oldnode, newnode;
> > > struct virtio_net *old_dev;
> > > struct vhost_virtqueue *old_vq, *vq;
> > > + struct zcopy_mbuf *new_zmbuf;
> > > + struct vring_used_elem *new_shadow_used_ring;
> > > + struct batch_copy_elem *new_batch_copy_elems;
> > > int ret;
> > >
> > > old_dev = dev;
> > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
> > > return dev;
> > >
> > > memcpy(vq, old_vq, sizeof(*vq));
> > > + TAILQ_INIT(&vq->zmbuf_list);
> > > +
> > > + new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > > + sizeof(struct zcopy_mbuf), 0, newnode);
> > > + if (new_zmbuf) {
> > > + rte_free(vq->zmbufs);
> > > + vq->zmbufs = new_zmbuf;
> > > + }
> >
> > You need to consider how to handle the case ( rte_malloc_socket
> > return NULL).
>
> If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep vhost
> alive.
It sounds reasonable, another question is,
for the 3 blocks of memory being allocated, If some succeed , others fails, Does it mean that
the code will run on different socket? What's the perf impact if it happens.
thanks
Zhiyong
Hi
> > > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net
> *dev,
> > > > "zero copy is force disabled\n");
> > > > dev->dequeue_zero_copy = 0;
> > > > }
> > > > + TAILQ_INIT(&vq->zmbuf_list);
> > > > }
> > > >
> > > > vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9
> @@
> > > > numa_realloc(struct virtio_net *dev, int index)
> > > > int oldnode, newnode;
> > > > struct virtio_net *old_dev;
> > > > struct vhost_virtqueue *old_vq, *vq;
> > > > + struct zcopy_mbuf *new_zmbuf;
> > > > + struct vring_used_elem *new_shadow_used_ring;
> > > > + struct batch_copy_elem *new_batch_copy_elems;
> > > > int ret;
> > > >
> > > > old_dev = dev;
> > > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int
> index)
> > > > return dev;
> > > >
> > > > memcpy(vq, old_vq, sizeof(*vq));
> > > > + TAILQ_INIT(&vq->zmbuf_list);
> > > > +
> > > > + new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > > > + sizeof(struct zcopy_mbuf), 0, newnode);
> > > > + if (new_zmbuf) {
> > > > + rte_free(vq->zmbufs);
> > > > + vq->zmbufs = new_zmbuf;
> > > > + }
> > >
> > > You need to consider how to handle the case ( rte_malloc_socket
> > > return NULL).
> >
> > If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep
> > vhost alive.
>
> It sounds reasonable, another question is, for the 3 blocks of memory being
> allocated, If some succeed , others fails, Does it mean that the code will
> run on different socket? What's the perf impact if it happens.
The original code doesn't do deep copy and thus access memory on different socket, this patch is to mitigate this situation. It does access remote memory when one of above allocation failed.
I saw some performance improvement (24.8Gbits/s -> 26.1Gbit/s) on my dev machine when only reallocate for zmbufs, while I didn't see significant performance difference when allocating vring_used_elem
and batch_copy_elem.
On 01/15/2018 12:32 PM, Junjie Chen wrote:
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
> deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
>
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
> lib/librte_vhost/vhost_user.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
> -----Original Message-----
> From: Chen, Junjie J
> Sent: Tuesday, January 16, 2018 3:39 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; yliu@fridaylinux.org;
> maxime.coquelin@redhat.com
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] vhost: do deep copy while reallocate vq
>
> Hi
> > > > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net
> > *dev,
> > > > > "zero copy is force disabled\n");
> > > > > dev->dequeue_zero_copy = 0;
> > > > > }
> > > > > + TAILQ_INIT(&vq->zmbuf_list);
> > > > > }
> > > > >
> > > > > vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6
> +262,9
> > @@
> > > > > numa_realloc(struct virtio_net *dev, int index)
> > > > > int oldnode, newnode;
> > > > > struct virtio_net *old_dev;
> > > > > struct vhost_virtqueue *old_vq, *vq;
> > > > > + struct zcopy_mbuf *new_zmbuf;
> > > > > + struct vring_used_elem *new_shadow_used_ring;
> > > > > + struct batch_copy_elem *new_batch_copy_elems;
> > > > > int ret;
> > > > >
> > > > > old_dev = dev;
> > > > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int
> > index)
> > > > > return dev;
> > > > >
> > > > > memcpy(vq, old_vq, sizeof(*vq));
> > > > > + TAILQ_INIT(&vq->zmbuf_list);
> > > > > +
> > > > > + new_zmbuf = rte_malloc_socket(NULL, vq-
> >zmbuf_size *
> > > > > + sizeof(struct zcopy_mbuf), 0, newnode);
> > > > > + if (new_zmbuf) {
> > > > > + rte_free(vq->zmbufs);
> > > > > + vq->zmbufs = new_zmbuf;
> > > > > + }
> > > >
> > > > You need to consider how to handle the case ( rte_malloc_socket
> > > > return NULL).
> > >
> > > If it failed to allocate new_zmbuf, it uses old zmbufs, so as to
> > > keep vhost alive.
> >
> > It sounds reasonable, another question is, for the 3 blocks of memory
> > being allocated, If some succeed , others fails, Does it mean that
> > the code will run on different socket? What's the perf impact if it happens.
>
> The original code doesn't do deep copy and thus access memory on different
> socket, this patch is to mitigate this situation. It does access remote memory
> when one of above allocation failed.
>
> I saw some performance improvement (24.8Gbits/s -> 26.1Gbit/s) on my dev
> machine when only reallocate for zmbufs, while I didn't see significant
> performance difference when allocating vring_used_elem and
> batch_copy_elem.
Great,
Reviewed-by: Zhiyong Yang <zhiyong.yang@intel.com>
Thanks
Zhiyong
On Mon, Jan 15, 2018 at 06:32:19AM -0500, Junjie Chen wrote:
> When vhost reallocate dev and vq for NUMA enabled case, it doesn't perform
> deep copy, which lead to 1) zmbuf list not valid 2) remote memory access.
> This patch is to re-initlize the zmbuf list and also do the deep copy.
>
> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
Applied to dpdk-next-virtio.
Thanks.
--yliu
@@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
"zero copy is force disabled\n");
dev->dequeue_zero_copy = 0;
}
+ TAILQ_INIT(&vq->zmbuf_list);
}
vq->shadow_used_ring = rte_malloc(NULL,
@@ -261,6 +262,9 @@ numa_realloc(struct virtio_net *dev, int index)
int oldnode, newnode;
struct virtio_net *old_dev;
struct vhost_virtqueue *old_vq, *vq;
+ struct zcopy_mbuf *new_zmbuf;
+ struct vring_used_elem *new_shadow_used_ring;
+ struct batch_copy_elem *new_batch_copy_elems;
int ret;
old_dev = dev;
@@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int index)
return dev;
memcpy(vq, old_vq, sizeof(*vq));
+ TAILQ_INIT(&vq->zmbuf_list);
+
+ new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
+ sizeof(struct zcopy_mbuf), 0, newnode);
+ if (new_zmbuf) {
+ rte_free(vq->zmbufs);
+ vq->zmbufs = new_zmbuf;
+ }
+
+ new_shadow_used_ring = rte_malloc_socket(NULL,
+ vq->size * sizeof(struct vring_used_elem),
+ RTE_CACHE_LINE_SIZE,
+ newnode);
+ if (new_shadow_used_ring) {
+ rte_free(vq->shadow_used_ring);
+ vq->shadow_used_ring = new_shadow_used_ring;
+ }
+
+ new_batch_copy_elems = rte_malloc_socket(NULL,
+ vq->size * sizeof(struct batch_copy_elem),
+ RTE_CACHE_LINE_SIZE,
+ newnode);
+ if (new_batch_copy_elems) {
+ rte_free(vq->batch_copy_elems);
+ vq->batch_copy_elems = new_batch_copy_elems;
+ }
+
rte_free(old_vq);
}