[dpdk-dev,2/4] vhost: introduce vhost_log_write

Message ID 1449027793-30975-3-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yuanhan Liu Dec. 2, 2015, 3:43 a.m. UTC
Introduce vhost_log_write() helper function to log the dirty pages we
touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
log is presented by 1 bit.

Therefore, vhost_log_write() simply finds the right bit for related
page we are gonna change, and set it to 1. dev->log_base denotes the
start of the dirty page bitmap.

The page address is biased by log_guest_addr, which is derived from
SET_VRING_ADDR request as part of the vring related addresses.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 lib/librte_vhost/rte_virtio_net.h | 34 ++++++++++++++++++++++++++++++++++
 lib/librte_vhost/virtio-net.c     |  4 ++++
 2 files changed, 38 insertions(+)
  

Comments

Victor Kaplansky Dec. 2, 2015, 1:53 p.m. UTC | #1
On Wed, Dec 02, 2015 at 11:43:11AM +0800, Yuanhan Liu wrote:
> Introduce vhost_log_write() helper function to log the dirty pages we
> touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
> log is presented by 1 bit.
> 
> Therefore, vhost_log_write() simply finds the right bit for related
> page we are gonna change, and set it to 1. dev->log_base denotes the
> start of the dirty page bitmap.
> 
> The page address is biased by log_guest_addr, which is derived from
> SET_VRING_ADDR request as part of the vring related addresses.
> 
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>  lib/librte_vhost/rte_virtio_net.h | 34 ++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/virtio-net.c     |  4 ++++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index 416dac2..191c1be 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -40,6 +40,7 @@
>   */
>  
>  #include <stdint.h>
> +#include <linux/vhost.h>
>  #include <linux/virtio_ring.h>
>  #include <linux/virtio_net.h>
>  #include <sys/eventfd.h>
> @@ -59,6 +60,8 @@ struct rte_mbuf;
>  /* Backend value set by guest. */
>  #define VIRTIO_DEV_STOPPED -1
>  
> +#define VHOST_LOG_PAGE	4096
> +
>  
>  /* Enum for virtqueue management. */
>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
> @@ -82,6 +85,7 @@ struct vhost_virtqueue {
>  	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
>  	struct vring_avail	*avail;			/**< Virtqueue available ring. */
>  	struct vring_used	*used;			/**< Virtqueue used ring. */
> +	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
>  	uint32_t		size;			/**< Size of descriptor ring. */
>  	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
>  	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
> @@ -203,6 +207,36 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
>  	return vhost_va;
>  }
>  
> +static inline void __attribute__((always_inline))
> +vhost_log_page(uint8_t *log_base, uint64_t page)
> +{
> +	/* TODO: to make it atomic? */
> +	log_base[page / 8] |= 1 << (page % 8);

I think the atomic OR operation is necessary only if there can be
more than one vhost-user back-end updating the guest's memory
simultaneously. However probably it is pretty safe to perform
regular OR operation, since rings are not shared between
back-end. What about buffers pointed by descriptors?  To be on
the safe side, I would use a GCC built-in function
__sync_fetch_and_or(). 

> +}
> +
> +static inline void __attribute__((always_inline))
> +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		uint64_t offset, uint64_t len)
> +{
> +	uint64_t addr = vq->log_guest_addr;
> +	uint64_t page;
> +
> +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> +		     !dev->log_base || !len))
> +		return;

Isn't "likely" more appropriate in above, since the whole
expression is expected to be true most of the time?

> +
> +	addr += offset;
> +	if (dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8))
> +		return;
> +
> +	page = addr / VHOST_LOG_PAGE;
> +	while (page * VHOST_LOG_PAGE < addr + len) {
> +		vhost_log_page(dev->log_base, page);
> +		page += VHOST_LOG_PAGE;
> +	}
> +}
> +
> +
>  /**
>   *  Disable features in feature_mask. Returns 0 on success.
>   */
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 8364938..4481827 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -666,12 +666,16 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
>  		return -1;
>  	}
>  
> +	vq->log_guest_addr = addr->log_guest_addr;
> +
>  	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n",
>  			dev->device_fh, vq->desc);
>  	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n",
>  			dev->device_fh, vq->avail);
>  	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n",
>  			dev->device_fh, vq->used);
> +	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %p\n",
> +			dev->device_fh, (void *)(uintptr_t)vq->log_guest_addr);
>  
>  	return 0;
>  }
> -- 
> 1.9.0
  
Yuanhan Liu Dec. 2, 2015, 2:39 p.m. UTC | #2
On Wed, Dec 02, 2015 at 03:53:01PM +0200, Victor Kaplansky wrote:
> On Wed, Dec 02, 2015 at 11:43:11AM +0800, Yuanhan Liu wrote:
> > Introduce vhost_log_write() helper function to log the dirty pages we
> > touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
> > log is presented by 1 bit.
> > 
> > Therefore, vhost_log_write() simply finds the right bit for related
> > page we are gonna change, and set it to 1. dev->log_base denotes the
> > start of the dirty page bitmap.
> > 
> > The page address is biased by log_guest_addr, which is derived from
> > SET_VRING_ADDR request as part of the vring related addresses.
> > 
> > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > ---
> >  lib/librte_vhost/rte_virtio_net.h | 34 ++++++++++++++++++++++++++++++++++
> >  lib/librte_vhost/virtio-net.c     |  4 ++++
> >  2 files changed, 38 insertions(+)
> > 
> > diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> > index 416dac2..191c1be 100644
> > --- a/lib/librte_vhost/rte_virtio_net.h
> > +++ b/lib/librte_vhost/rte_virtio_net.h
> > @@ -40,6 +40,7 @@
> >   */
> >  
> >  #include <stdint.h>
> > +#include <linux/vhost.h>
> >  #include <linux/virtio_ring.h>
> >  #include <linux/virtio_net.h>
> >  #include <sys/eventfd.h>
> > @@ -59,6 +60,8 @@ struct rte_mbuf;
> >  /* Backend value set by guest. */
> >  #define VIRTIO_DEV_STOPPED -1
> >  
> > +#define VHOST_LOG_PAGE	4096
> > +
> >  
> >  /* Enum for virtqueue management. */
> >  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
> > @@ -82,6 +85,7 @@ struct vhost_virtqueue {
> >  	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
> >  	struct vring_avail	*avail;			/**< Virtqueue available ring. */
> >  	struct vring_used	*used;			/**< Virtqueue used ring. */
> > +	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
> >  	uint32_t		size;			/**< Size of descriptor ring. */
> >  	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
> >  	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
> > @@ -203,6 +207,36 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
> >  	return vhost_va;
> >  }
> >  
> > +static inline void __attribute__((always_inline))
> > +vhost_log_page(uint8_t *log_base, uint64_t page)
> > +{
> > +	/* TODO: to make it atomic? */
> > +	log_base[page / 8] |= 1 << (page % 8);
> 
> I think the atomic OR operation is necessary only if there can be
> more than one vhost-user back-end updating the guest's memory
> simultaneously. However probably it is pretty safe to perform
> regular OR operation, since rings are not shared between
> back-end. What about buffers pointed by descriptors?  To be on
> the safe side, I would use a GCC built-in function
> __sync_fetch_and_or(). 

The build has to be passed not only for gcc, but for icc and clang as
well.

> 
> > +}
> > +
> > +static inline void __attribute__((always_inline))
> > +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > +		uint64_t offset, uint64_t len)
> > +{
> > +	uint64_t addr = vq->log_guest_addr;
> > +	uint64_t page;
> > +
> > +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> > +		     !dev->log_base || !len))
> > +		return;
> 
> Isn't "likely" more appropriate in above, since the whole
> expression is expected to be true most of the time?

Sorry, it's a typo, and thanks for the catching.

	--yliu
  
Huawei Xie Dec. 9, 2015, 3:33 a.m. UTC | #3
On 12/2/2015 9:53 PM, Victor Kaplansky wrote:
> On Wed, Dec 02, 2015 at 11:43:11AM +0800, Yuanhan Liu wrote:
>> Introduce vhost_log_write() helper function to log the dirty pages we
>> touched. Page size is harded code to 4096 (VHOST_LOG_PAGE), and each
>> log is presented by 1 bit.
>>
>> Therefore, vhost_log_write() simply finds the right bit for related
>> page we are gonna change, and set it to 1. dev->log_base denotes the
>> start of the dirty page bitmap.
>>
>> The page address is biased by log_guest_addr, which is derived from
>> SET_VRING_ADDR request as part of the vring related addresses.
>>
>> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
>> ---
>>  lib/librte_vhost/rte_virtio_net.h | 34 ++++++++++++++++++++++++++++++++++
>>  lib/librte_vhost/virtio-net.c     |  4 ++++
>>  2 files changed, 38 insertions(+)
>>
>> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
>> index 416dac2..191c1be 100644
>> --- a/lib/librte_vhost/rte_virtio_net.h
>> +++ b/lib/librte_vhost/rte_virtio_net.h
>> @@ -40,6 +40,7 @@
>>   */
>>  
>>  #include <stdint.h>
>> +#include <linux/vhost.h>
>>  #include <linux/virtio_ring.h>
>>  #include <linux/virtio_net.h>
>>  #include <sys/eventfd.h>
>> @@ -59,6 +60,8 @@ struct rte_mbuf;
>>  /* Backend value set by guest. */
>>  #define VIRTIO_DEV_STOPPED -1
>>  
>> +#define VHOST_LOG_PAGE	4096
>> +
>>  
>>  /* Enum for virtqueue management. */
>>  enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
>> @@ -82,6 +85,7 @@ struct vhost_virtqueue {
>>  	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
>>  	struct vring_avail	*avail;			/**< Virtqueue available ring. */
>>  	struct vring_used	*used;			/**< Virtqueue used ring. */
>> +	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
>>  	uint32_t		size;			/**< Size of descriptor ring. */
>>  	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
>>  	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
>> @@ -203,6 +207,36 @@ gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
>>  	return vhost_va;
>>  }
>>  
>> +static inline void __attribute__((always_inline))
>> +vhost_log_page(uint8_t *log_base, uint64_t page)
>> +{
>> +	/* TODO: to make it atomic? */
>> +	log_base[page / 8] |= 1 << (page % 8);
> I think the atomic OR operation is necessary only if there can be
> more than one vhost-user back-end updating the guest's memory
> simultaneously. However probably it is pretty safe to perform
> regular OR operation, since rings are not shared between
> back-end. What about buffers pointed by descriptors?  To be on
> the safe side, I would use a GCC built-in function
> __sync_fetch_and_or(). 
>
>> +}
>> +
>> +static inline void __attribute__((always_inline))
>> +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> +		uint64_t offset, uint64_t len)
>> +{
>> +	uint64_t addr = vq->log_guest_addr;
>> +	uint64_t page;
>> +
>> +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
>> +		     !dev->log_base || !len))
>> +		return;
> Isn't "likely" more appropriate in above, since the whole
> expression is expected to be true most of the time?
Victor:
So we are not always logging, what is the message that tells the backend
the migration is started?
[...]
  
Yuanhan Liu Dec. 9, 2015, 3:42 a.m. UTC | #4
On Wed, Dec 09, 2015 at 03:33:16AM +0000, Xie, Huawei wrote:
...
> >> +static inline void __attribute__((always_inline))
> >> +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> >> +		uint64_t offset, uint64_t len)
> >> +{
> >> +	uint64_t addr = vq->log_guest_addr;
> >> +	uint64_t page;
> >> +
> >> +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> >> +		     !dev->log_base || !len))
> >> +		return;
> > Isn't "likely" more appropriate in above, since the whole
> > expression is expected to be true most of the time?
> Victor:
> So we are not always logging, what is the message that tells the backend
> the migration is started?

When log starts, VHOST_USER_SET_FEATURES request will be sent again,
with VHOST_F_LOG_ALL feature bit set.

	--yliu
  
Huawei Xie Dec. 9, 2015, 5:44 a.m. UTC | #5
On 12/9/2015 11:41 AM, Yuanhan Liu wrote:
> On Wed, Dec 09, 2015 at 03:33:16AM +0000, Xie, Huawei wrote:
> ...
>>>> +static inline void __attribute__((always_inline))
>>>> +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
>>>> +		uint64_t offset, uint64_t len)
>>>> +{
>>>> +	uint64_t addr = vq->log_guest_addr;
>>>> +	uint64_t page;
>>>> +
>>>> +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
>>>> +		     !dev->log_base || !len))
>>>> +		return;
>>> Isn't "likely" more appropriate in above, since the whole
>>> expression is expected to be true most of the time?
>> Victor:
>> So we are not always logging, what is the message that tells the backend
>> the migration is started?
> When log starts, VHOST_USER_SET_FEATURES request will be sent again,
> with VHOST_F_LOG_ALL feature bit set.
As the VHOST_USER_SET_FEATURES handling and rx/tx runs asynchronously,
we have to make sure we don't miss logging anything when this feature is
set. For example, I doubt like in virtio_dev_rx, is the dev->features
volatile?
> 	--yliu
>
  
Yuanhan Liu Dec. 9, 2015, 8:41 a.m. UTC | #6
On Wed, Dec 09, 2015 at 05:44:11AM +0000, Xie, Huawei wrote:
> On 12/9/2015 11:41 AM, Yuanhan Liu wrote:
> > On Wed, Dec 09, 2015 at 03:33:16AM +0000, Xie, Huawei wrote:
> > ...
> >>>> +static inline void __attribute__((always_inline))
> >>>> +vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> >>>> +		uint64_t offset, uint64_t len)
> >>>> +{
> >>>> +	uint64_t addr = vq->log_guest_addr;
> >>>> +	uint64_t page;
> >>>> +
> >>>> +	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
> >>>> +		     !dev->log_base || !len))
> >>>> +		return;
> >>> Isn't "likely" more appropriate in above, since the whole
> >>> expression is expected to be true most of the time?
> >> Victor:
> >> So we are not always logging, what is the message that tells the backend
> >> the migration is started?
> > When log starts, VHOST_USER_SET_FEATURES request will be sent again,
> > with VHOST_F_LOG_ALL feature bit set.
> As the VHOST_USER_SET_FEATURES handling and rx/tx runs asynchronously,
> we have to make sure we don't miss logging anything when this feature is
> set.

That's a good remind. Thanks.

> For example, I doubt like in virtio_dev_rx, is the dev->features
> volatile?

No, it is not volatile.

	--yliu
  

Patch

diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 416dac2..191c1be 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -40,6 +40,7 @@ 
  */
 
 #include <stdint.h>
+#include <linux/vhost.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_net.h>
 #include <sys/eventfd.h>
@@ -59,6 +60,8 @@  struct rte_mbuf;
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
 
+#define VHOST_LOG_PAGE	4096
+
 
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
@@ -82,6 +85,7 @@  struct vhost_virtqueue {
 	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
 	struct vring_avail	*avail;			/**< Virtqueue available ring. */
 	struct vring_used	*used;			/**< Virtqueue used ring. */
+	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
 	uint32_t		size;			/**< Size of descriptor ring. */
 	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
 	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
@@ -203,6 +207,36 @@  gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
 	return vhost_va;
 }
 
+static inline void __attribute__((always_inline))
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	/* TODO: to make it atomic? */
+	log_base[page / 8] |= 1 << (page % 8);
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t offset, uint64_t len)
+{
+	uint64_t addr = vq->log_guest_addr;
+	uint64_t page;
+
+	if (unlikely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		     !dev->log_base || !len))
+		return;
+
+	addr += offset;
+	if (dev->log_size < ((addr + len - 1) / VHOST_LOG_PAGE / 8))
+		return;
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page(dev->log_base, page);
+		page += VHOST_LOG_PAGE;
+	}
+}
+
+
 /**
  *  Disable features in feature_mask. Returns 0 on success.
  */
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 8364938..4481827 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -666,12 +666,16 @@  set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 		return -1;
 	}
 
+	vq->log_guest_addr = addr->log_guest_addr;
+
 	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n",
 			dev->device_fh, vq->desc);
 	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n",
 			dev->device_fh, vq->avail);
 	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n",
 			dev->device_fh, vq->used);
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %p\n",
+			dev->device_fh, (void *)(uintptr_t)vq->log_guest_addr);
 
 	return 0;
 }