vhost: cache guest/vhost physical address mapping

Message ID 20200316153353.112897-1-yong.liu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: cache guest/vhost physical address mapping |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/travis-robot warning Travis build: failed
ci/Intel-compilation success Compilation OK

Commit Message

Marvin Liu March 16, 2020, 3:33 p.m. UTC
  If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now add cached mapping table which will sorted by using
sequence. Address translation will first check cached mapping table,
now performance is back.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
  

Comments

Xiaolong Ye March 16, 2020, 1:48 p.m. UTC | #1
Hi, Marvin

On 03/16, Marvin Liu wrote:
>If Tx zero copy enabled, gpa to hpa mapping table is updated one by
>one. This will harm performance when guest memory backend using 2M
>hugepages. Now add cached mapping table which will sorted by using
>sequence. Address translation will first check cached mapping table,
>now performance is back.
>
>Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
>diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>index 2087d1400..de2c09e7e 100644
>--- a/lib/librte_vhost/vhost.h
>+++ b/lib/librte_vhost/vhost.h
>@@ -368,7 +368,9 @@ struct virtio_net {
> 	struct vhost_device_ops const *notify_ops;
> 
> 	uint32_t		nr_guest_pages;
>+	uint32_t		nr_cached;

What about naming it nr_cached_guest_pages to make it more self-explanatory
as nr_cached is too generic?

> 	uint32_t		max_guest_pages;
>+	struct guest_page       *cached_guest_pages;
> 	struct guest_page       *guest_pages;
> 
> 	int			slave_req_fd;
>@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
> 	uint32_t i;
> 	struct guest_page *page;
> 
>+	for (i = 0; i < dev->nr_cached; i++) {
>+		page = &dev->cached_guest_pages[i];
>+		if (gpa >= page->guest_phys_addr &&
>+			gpa + size < page->guest_phys_addr + page->size) {
>+			return gpa - page->guest_phys_addr +
>+				page->host_phys_addr;
>+		}
>+	}
>+
> 	for (i = 0; i < dev->nr_guest_pages; i++) {
> 		page = &dev->guest_pages[i];
> 
> 		if (gpa >= page->guest_phys_addr &&
> 		    gpa + size < page->guest_phys_addr + page->size) {
>+			rte_memcpy(&dev->cached_guest_pages[dev->nr_cached],
>+				   page, sizeof(struct guest_page));
>+			dev->nr_cached++;
> 			return gpa - page->guest_phys_addr +
> 			       page->host_phys_addr;
> 		}
>diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>index bd1be0104..573e99066 100644
>--- a/lib/librte_vhost/vhost_user.c
>+++ b/lib/librte_vhost/vhost_user.c
>@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> 	}
> 
> 	free(dev->guest_pages);
>+	free(dev->cached_guest_pages);
> 	dev->guest_pages = NULL;
>+	dev->cached_guest_pages = NULL;
> 
> 	if (dev->log_addr) {
> 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
>@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> 		old_pages = dev->guest_pages;
> 		dev->guest_pages = realloc(dev->guest_pages,
> 					dev->max_guest_pages * sizeof(*page));
>-		if (!dev->guest_pages) {
>+		dev->cached_guest_pages = realloc(dev->cached_guest_pages,
>+					dev->max_guest_pages * sizeof(*page));
>+		dev->nr_cached = 0;
>+		if (!dev->guest_pages || !dev->cached_guest_pages) {

Better to compare pointer to NULL according to DPDK's coding style.

> 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> 			free(old_pages);
> 			return -1;
>@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
> 		}
> 	}
> 

Do we need initialize dev->nr_cached to 0 explicitly here?

>+	if (!dev->cached_guest_pages) {
>+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
>+						sizeof(struct guest_page));

I'm wondering why use malloc/realloc/free for cached_guest_pages instead of DPDK
memory allocator APIs, I can see current code uses malloc/realloc/free for guest_pages,
Is there any history reason I don't know?

Thanks,
Xiaolong

>+		if (dev->cached_guest_pages == NULL) {
>+			VHOST_LOG_CONFIG(ERR,
>+				"(%d) failed to allocate memory "
>+				"for dev->cached_guest_pages\n",
>+				dev->vid);
>+			return RTE_VHOST_MSG_RESULT_ERR;
>+		}
>+	}
>+
> 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
> 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> 	if (dev->mem == NULL) {
>-- 
>2.17.1
>
  
Marvin Liu March 17, 2020, 1:01 a.m. UTC | #2
Thanks, xiaolong. 

> -----Original Message-----
> From: Ye, Xiaolong <xiaolong.ye@intel.com>
> Sent: Monday, March 16, 2020 9:48 PM
> To: Liu, Yong <yong.liu@intel.com>
> Cc: maxime.coquelin@redhat.com; Wang, Zhihong
> <zhihong.wang@intel.com>; dev@dpdk.org
> Subject: Re: [PATCH] vhost: cache guest/vhost physical address mapping
> 
> Hi, Marvin
> 
> On 03/16, Marvin Liu wrote:
> >If Tx zero copy enabled, gpa to hpa mapping table is updated one by
> >one. This will harm performance when guest memory backend using 2M
> >hugepages. Now add cached mapping table which will sorted by using
> >sequence. Address translation will first check cached mapping table,
> >now performance is back.
> >
> >Signed-off-by: Marvin Liu <yong.liu@intel.com>
> >
> >diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >index 2087d1400..de2c09e7e 100644
> >--- a/lib/librte_vhost/vhost.h
> >+++ b/lib/librte_vhost/vhost.h
> >@@ -368,7 +368,9 @@ struct virtio_net {
> > 	struct vhost_device_ops const *notify_ops;
> >
> > 	uint32_t		nr_guest_pages;
> >+	uint32_t		nr_cached;
> 
> What about naming it nr_cached_guest_pages to make it more self-
> explanatory
> as nr_cached is too generic?

Agreed, naming is too generic. Will be changed in next version.

> 
> > 	uint32_t		max_guest_pages;
> >+	struct guest_page       *cached_guest_pages;
> > 	struct guest_page       *guest_pages;
> >
> > 	int			slave_req_fd;
> >@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa,
> uint64_t size)
> > 	uint32_t i;
> > 	struct guest_page *page;
> >
> >+	for (i = 0; i < dev->nr_cached; i++) {
> >+		page = &dev->cached_guest_pages[i];
> >+		if (gpa >= page->guest_phys_addr &&
> >+			gpa + size < page->guest_phys_addr + page->size) {
> >+			return gpa - page->guest_phys_addr +
> >+				page->host_phys_addr;
> >+		}
> >+	}
> >+
> > 	for (i = 0; i < dev->nr_guest_pages; i++) {
> > 		page = &dev->guest_pages[i];
> >
> > 		if (gpa >= page->guest_phys_addr &&
> > 		    gpa + size < page->guest_phys_addr + page->size) {
> >+			rte_memcpy(&dev->cached_guest_pages[dev-
> >nr_cached],
> >+				   page, sizeof(struct guest_page));
> >+			dev->nr_cached++;
> > 			return gpa - page->guest_phys_addr +
> > 			       page->host_phys_addr;
> > 		}
> >diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >index bd1be0104..573e99066 100644
> >--- a/lib/librte_vhost/vhost_user.c
> >+++ b/lib/librte_vhost/vhost_user.c
> >@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> > 	}
> >
> > 	free(dev->guest_pages);
> >+	free(dev->cached_guest_pages);
> > 	dev->guest_pages = NULL;
> >+	dev->cached_guest_pages = NULL;
> >
> > 	if (dev->log_addr) {
> > 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
> >@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev,
> uint64_t guest_phys_addr,
> > 		old_pages = dev->guest_pages;
> > 		dev->guest_pages = realloc(dev->guest_pages,
> > 					dev->max_guest_pages *
> sizeof(*page));
> >-		if (!dev->guest_pages) {
> >+		dev->cached_guest_pages = realloc(dev-
> >cached_guest_pages,
> >+					dev->max_guest_pages *
> sizeof(*page));
> >+		dev->nr_cached = 0;
> >+		if (!dev->guest_pages || !dev->cached_guest_pages) {
> 
> Better to compare pointer to NULL according to DPDK's coding style.
> 
 OK, will change it.
> > 			VHOST_LOG_CONFIG(ERR, "cannot realloc
> guest_pages\n");
> > 			free(old_pages);
> > 			return -1;
> >@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
> > 		}
> > 	}
> >
> 
> Do we need initialize dev->nr_cached to 0 explicitly here?
> 

Structure vhost_virtqueue has been cleared in function init_vring_queue, it is not needed to do initialization in other place.

> >+	if (!dev->cached_guest_pages) {
> >+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
> >+						sizeof(struct guest_page));
> 
> I'm wondering why use malloc/realloc/free for cached_guest_pages instead
> of DPDK
> memory allocator APIs, I can see current code uses malloc/realloc/free for
> guest_pages,
> Is there any history reason I don't know?
> 

I don't think there's specific reason to use glibc malloc/realloc functions. 
History may be lost since code was added few years ago. I will changed these functions to dpdk API in next version.

> Thanks,
> Xiaolong
> 
> >+		if (dev->cached_guest_pages == NULL) {
> >+			VHOST_LOG_CONFIG(ERR,
> >+				"(%d) failed to allocate memory "
> >+				"for dev->cached_guest_pages\n",
> >+				dev->vid);
> >+			return RTE_VHOST_MSG_RESULT_ERR;
> >+		}
> >+	}
> >+
> > 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> rte_vhost_memory) +
> > 		sizeof(struct rte_vhost_mem_region) * memory->nregions,
> 0);
> > 	if (dev->mem == NULL) {
> >--
> >2.17.1
> >
  

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 2087d1400..de2c09e7e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -368,7 +368,9 @@  struct virtio_net {
 	struct vhost_device_ops const *notify_ops;
 
 	uint32_t		nr_guest_pages;
+	uint32_t		nr_cached;
 	uint32_t		max_guest_pages;
+	struct guest_page       *cached_guest_pages;
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
@@ -554,11 +556,23 @@  gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 	uint32_t i;
 	struct guest_page *page;
 
+	for (i = 0; i < dev->nr_cached; i++) {
+		page = &dev->cached_guest_pages[i];
+		if (gpa >= page->guest_phys_addr &&
+			gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+				page->host_phys_addr;
+		}
+	}
+
 	for (i = 0; i < dev->nr_guest_pages; i++) {
 		page = &dev->guest_pages[i];
 
 		if (gpa >= page->guest_phys_addr &&
 		    gpa + size < page->guest_phys_addr + page->size) {
+			rte_memcpy(&dev->cached_guest_pages[dev->nr_cached],
+				   page, sizeof(struct guest_page));
+			dev->nr_cached++;
 			return gpa - page->guest_phys_addr +
 			       page->host_phys_addr;
 		}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..573e99066 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -192,7 +192,9 @@  vhost_backend_cleanup(struct virtio_net *dev)
 	}
 
 	free(dev->guest_pages);
+	free(dev->cached_guest_pages);
 	dev->guest_pages = NULL;
+	dev->cached_guest_pages = NULL;
 
 	if (dev->log_addr) {
 		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
@@ -905,7 +907,10 @@  add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		old_pages = dev->guest_pages;
 		dev->guest_pages = realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page));
-		if (!dev->guest_pages) {
+		dev->cached_guest_pages = realloc(dev->cached_guest_pages,
+					dev->max_guest_pages * sizeof(*page));
+		dev->nr_cached = 0;
+		if (!dev->guest_pages || !dev->cached_guest_pages) {
 			VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
 			free(old_pages);
 			return -1;
@@ -1075,6 +1080,18 @@  vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
 		}
 	}
 
+	if (!dev->cached_guest_pages) {
+		dev->cached_guest_pages = malloc(dev->max_guest_pages *
+						sizeof(struct guest_page));
+		if (dev->cached_guest_pages == NULL) {
+			VHOST_LOG_CONFIG(ERR,
+				"(%d) failed to allocate memory "
+				"for dev->cached_guest_pages\n",
+				dev->vid);
+			return RTE_VHOST_MSG_RESULT_ERR;
+		}
+	}
+
 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
 		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
 	if (dev->mem == NULL) {