[v3,05/15] vdpa/ifc: add blk dev sw live migration

Message ID 1643425417-215270-6-git-send-email-andy.pei@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Maxime Coquelin
Headers
Series add virtio_blk device support to vdpa/ifc |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Pei, Andy Jan. 29, 2022, 3:03 a.m. UTC
  Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.

Signed-off-by: Andy Pei <andy.pei@intel.com>
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 116 insertions(+), 22 deletions(-)
  

Comments

Maxime Coquelin March 22, 2022, 11:10 a.m. UTC | #1
Hi Andy,

"vdpa/ifc: add block device SW live-migration"

On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.

Please try to make the above sentence simpler. Also, it seems that below
patch changes behaviour for net devices, so the commit message should
explain that.

> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc;
> @@ -688,7 +747,10 @@ struct rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size);
> @@ -726,6 +790,8 @@ struct rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */
> @@ -874,7 +959,10 @@ struct rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
  
Pei, Andy March 23, 2022, 9:08 a.m. UTC | #2
Hi Maxime,

Thanks for your reply and reply is inline.

-----Original Message-----
From: Maxime Coquelin <maxime.coquelin@redhat.com> 
Sent: Tuesday, March 22, 2022 7:10 PM
To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
Cc: Xia, Chenbo <chenbo.xia@intel.com>; Cao, Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
Subject: Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration

Hi Andy,

"vdpa/ifc: add block device SW live-migration"

On 1/29/22 04:03, Andy Pei wrote:
> Enable virtio blk sw live migration relay callfd and log the dirty page.

Please try to make the above sentence simpler. Also, it seems that below patch changes behaviour for net devices, so the commit message should explain that.

Andy: Sure, I think it is better to send out a new patch set.
Using a simper commit log and re-work to make sure the code do not change the behavior of net device.

> In this version we ignore the write cmd and still mark it dirty.
> 
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
>   drivers/vdpa/ifc/base/ifcvf.c |   4 +-
>   drivers/vdpa/ifc/base/ifcvf.h |   6 ++
>   drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
>   3 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c 
> b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -189,7 +189,7 @@
>   	IFCVF_WRITE_REG32(val >> 32, hi);
>   }
>   
> -STATIC int
> +int
>   ifcvf_hw_enable(struct ifcvf_hw *hw)
>   {
>   	struct ifcvf_pci_common_cfg *cfg;
> @@ -238,7 +238,7 @@
>   	return 0;
>   }
>   
> -STATIC void
> +void
>   ifcvf_hw_disable(struct ifcvf_hw *hw)
>   {
>   	u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h 
> b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
>   u64
>   ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>   
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
>   #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c 
> b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4f99bb3..a930825 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
>   
>   	rte_vhost_get_negotiated_features(vid, &features);
>   	if (RTE_VHOST_NEED_LOG(features)) {
> -		ifcvf_disable_logging(hw);
> -		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> -		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> -				log_base, IFCVF_LOG_BASE, log_size);
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
> +		/* IFCVF marks dirty memory pages for only packet buffer,
> +		 * SW helps to mark the used ring as dirty after device stops.
> +		 */
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> +			rte_vhost_log_used_vring(vid, i, 0, len);
> +		}
> +	}
> +}
> +
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> +	struct ifcvf_hw *hw = &internal->hw;
> +	struct rte_vhost_vring vq;
> +	int i, vid;
> +	uint64_t features = 0;
> +	uint64_t log_base = 0, log_size = 0;
> +	uint64_t len;
> +
> +	vid = internal->vid;
> +
> +	if (internal->device_type == IFCVF_BLK) {
> +		for (i = 0; i < hw->nr_vring; i++) {
> +			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> +			while (vq.avail->idx != vq.used->idx) {
> +				ifcvf_notify_queue(hw, i);
> +				usleep(10);
> +			}
> +			hw->vring[i].last_avail_idx = vq.avail->idx;
> +			hw->vring[i].last_used_idx = vq.used->idx;
> +		}
> +	}
> +
> +	ifcvf_hw_disable(hw);
> +
> +	for (i = 0; i < hw->nr_vring; i++)
> +		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> +				hw->vring[i].last_used_idx);
> +
> +	if (internal->sw_lm)
> +		return;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +	if (RTE_VHOST_NEED_LOG(features)) {
> +		if (internal->device_type == IFCVF_NET) {
> +			ifcvf_disable_logging(hw);
> +			rte_vhost_get_log_base(internal->vid, &log_base,
> +				&log_size);
> +			rte_vfio_container_dma_unmap(
> +				internal->vfio_container_fd, log_base,
> +				IFCVF_LOG_BASE, log_size);
> +		}
>   		/*
>   		 * IFCVF marks dirty memory pages for only packet buffer,
>   		 * SW helps to mark the used ring as dirty after device stops.
> @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
>   		}
>   		hw->vring[i].avail = gpa;
>   
> -		/* Direct I/O for Tx queue, relay for Rx queue */
> -		if (i & 1) {
> +		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
> +		 * BLK: relay every queue
> +		 */
> +		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
>   			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
>   			if (gpa == 0) {
>   				DRV_LOG(ERR, "Fail to get GPA for used ring.");
>   				return -1;
>   			}
>   			hw->vring[i].used = gpa;
> -		} else {
> +		} else if (internal->device_type == IFCVF_BLK) {
>   			hw->vring[i].used = m_vring_iova +
>   				(char *)internal->m_vring[i].used -
>   				(char *)internal->m_vring[i].desc; @@ -688,7 +747,10 @@ struct 
> rte_vdpa_dev_info {
>   	}
>   	hw->nr_vring = nr_vring;
>   
> -	return ifcvf_start_hw(&internal->hw);
> +	if (internal->device_type == IFCVF_NET)
> +		return ifcvf_start_hw(&internal->hw);
> +	else if (internal->device_type == IFCVF_BLK)
> +		return ifcvf_hw_enable(&internal->hw);
>   
>   error:
>   	for (i = 0; i < nr_vring; i++)
> @@ -713,8 +775,10 @@ struct rte_vdpa_dev_info {
>   
>   	for (i = 0; i < hw->nr_vring; i++) {
>   		/* synchronize remaining new used entries if any */
> -		if ((i & 1) == 0)
> +		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
> +		     internal->device_type == IFCVF_BLK) {
>   			update_used_ring(internal, i);
> +		}
>   
>   		rte_vhost_get_vhost_vring(vid, i, &vq);
>   		len = IFCVF_USED_RING_LEN(vq.size); @@ -726,6 +790,8 @@ struct 
> rte_vdpa_dev_info {
>   			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
>   			m_vring_iova, size);
>   
> +		hw->vring[i].last_avail_idx = vq.used->idx;
> +		hw->vring[i].last_used_idx = vq.used->idx;
>   		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
>   				hw->vring[i].last_used_idx);
>   		rte_free(internal->m_vring[i].desc);
> @@ -776,17 +842,36 @@ struct rte_vdpa_dev_info {
>   		}
>   	}
>   
> -	for (qid = 0; qid < q_num; qid += 2) {
> -		ev.events = EPOLLIN | EPOLLPRI;
> -		/* leave a flag to mark it's for interrupt */
> -		ev.data.u64 = 1 | qid << 1 |
> -			(uint64_t)internal->intr_fd[qid] << 32;
> -		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> -				< 0) {
> -			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> -			return NULL;
> +	if (internal->device_type == IFCVF_NET) {
> +		for (qid = 0; qid < q_num; qid += 2) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
> +		}
> +	} else if (internal->device_type == IFCVF_BLK) {
> +		for (qid = 0; qid < q_num; qid += 1) {
> +			ev.events = EPOLLIN | EPOLLPRI;
> +			/* leave a flag to mark it's for interrupt */
> +			ev.data.u64 = 1 | qid << 1 |
> +				(uint64_t)internal->intr_fd[qid] << 32;
> +			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> +				      internal->intr_fd[qid], &ev)
> +					< 0) {
> +				DRV_LOG(ERR, "epoll add error: %s",
> +					strerror(errno));
> +				return NULL;
> +			}
> +			update_used_ring(internal, qid);
>   		}
> -		update_used_ring(internal, qid);
>   	}
>   
>   	/* start relay with a first kick */ @@ -874,7 +959,10 @@ struct 
> rte_vdpa_dev_info {
>   
>   	/* stop the direct IO data path */
>   	unset_notify_relay(internal);
> -	vdpa_ifcvf_stop(internal);
> +	if (internal->device_type == IFCVF_NET)
> +		vdpa_ifcvf_stop(internal);
> +	else if (internal->device_type == IFCVF_BLK)
> +		vdpa_ifcvf_blk_pause(internal);
>   	vdpa_disable_vfio_intr(internal);
>   
>   	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, 
> false);
  

Patch

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@ 
 	IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
 	struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@ 
 	return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
 	u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@  struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4f99bb3..a930825 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@  struct rte_vdpa_dev_info {
 
 	rte_vhost_get_negotiated_features(vid, &features);
 	if (RTE_VHOST_NEED_LOG(features)) {
-		ifcvf_disable_logging(hw);
-		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-				log_base, IFCVF_LOG_BASE, log_size);
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
+		/* IFCVF marks dirty memory pages for only packet buffer,
+		 * SW helps to mark the used ring as dirty after device stops.
+		 */
+		for (i = 0; i < hw->nr_vring; i++) {
+			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+			rte_vhost_log_used_vring(vid, i, 0, len);
+		}
+	}
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+	struct ifcvf_hw *hw = &internal->hw;
+	struct rte_vhost_vring vq;
+	int i, vid;
+	uint64_t features = 0;
+	uint64_t log_base = 0, log_size = 0;
+	uint64_t len;
+
+	vid = internal->vid;
+
+	if (internal->device_type == IFCVF_BLK) {
+		for (i = 0; i < hw->nr_vring; i++) {
+			rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+			while (vq.avail->idx != vq.used->idx) {
+				ifcvf_notify_queue(hw, i);
+				usleep(10);
+			}
+			hw->vring[i].last_avail_idx = vq.avail->idx;
+			hw->vring[i].last_used_idx = vq.used->idx;
+		}
+	}
+
+	ifcvf_hw_disable(hw);
+
+	for (i = 0; i < hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+				hw->vring[i].last_used_idx);
+
+	if (internal->sw_lm)
+		return;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+	if (RTE_VHOST_NEED_LOG(features)) {
+		if (internal->device_type == IFCVF_NET) {
+			ifcvf_disable_logging(hw);
+			rte_vhost_get_log_base(internal->vid, &log_base,
+				&log_size);
+			rte_vfio_container_dma_unmap(
+				internal->vfio_container_fd, log_base,
+				IFCVF_LOG_BASE, log_size);
+		}
 		/*
 		 * IFCVF marks dirty memory pages for only packet buffer,
 		 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@  struct rte_vdpa_dev_info {
 		}
 		hw->vring[i].avail = gpa;
 
-		/* Direct I/O for Tx queue, relay for Rx queue */
-		if (i & 1) {
+		/* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+		 * BLK: relay every queue
+		 */
+		if ((i & 1) && (internal->device_type == IFCVF_NET)) {
 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
 			if (gpa == 0) {
 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
 				return -1;
 			}
 			hw->vring[i].used = gpa;
-		} else {
+		} else if (internal->device_type == IFCVF_BLK) {
 			hw->vring[i].used = m_vring_iova +
 				(char *)internal->m_vring[i].used -
 				(char *)internal->m_vring[i].desc;
@@ -688,7 +747,10 @@  struct rte_vdpa_dev_info {
 	}
 	hw->nr_vring = nr_vring;
 
-	return ifcvf_start_hw(&internal->hw);
+	if (internal->device_type == IFCVF_NET)
+		return ifcvf_start_hw(&internal->hw);
+	else if (internal->device_type == IFCVF_BLK)
+		return ifcvf_hw_enable(&internal->hw);
 
 error:
 	for (i = 0; i < nr_vring; i++)
@@ -713,8 +775,10 @@  struct rte_vdpa_dev_info {
 
 	for (i = 0; i < hw->nr_vring; i++) {
 		/* synchronize remaining new used entries if any */
-		if ((i & 1) == 0)
+		if (((i & 1) == 0 && internal->device_type == IFCVF_NET) ||
+		     internal->device_type == IFCVF_BLK) {
 			update_used_ring(internal, i);
+		}
 
 		rte_vhost_get_vhost_vring(vid, i, &vq);
 		len = IFCVF_USED_RING_LEN(vq.size);
@@ -726,6 +790,8 @@  struct rte_vdpa_dev_info {
 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
 			m_vring_iova, size);
 
+		hw->vring[i].last_avail_idx = vq.used->idx;
+		hw->vring[i].last_used_idx = vq.used->idx;
 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
 				hw->vring[i].last_used_idx);
 		rte_free(internal->m_vring[i].desc);
@@ -776,17 +842,36 @@  struct rte_vdpa_dev_info {
 		}
 	}
 
-	for (qid = 0; qid < q_num; qid += 2) {
-		ev.events = EPOLLIN | EPOLLPRI;
-		/* leave a flag to mark it's for interrupt */
-		ev.data.u64 = 1 | qid << 1 |
-			(uint64_t)internal->intr_fd[qid] << 32;
-		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
-				< 0) {
-			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
-			return NULL;
+	if (internal->device_type == IFCVF_NET) {
+		for (qid = 0; qid < q_num; qid += 2) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
+		}
+	} else if (internal->device_type == IFCVF_BLK) {
+		for (qid = 0; qid < q_num; qid += 1) {
+			ev.events = EPOLLIN | EPOLLPRI;
+			/* leave a flag to mark it's for interrupt */
+			ev.data.u64 = 1 | qid << 1 |
+				(uint64_t)internal->intr_fd[qid] << 32;
+			if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+				      internal->intr_fd[qid], &ev)
+					< 0) {
+				DRV_LOG(ERR, "epoll add error: %s",
+					strerror(errno));
+				return NULL;
+			}
+			update_used_ring(internal, qid);
 		}
-		update_used_ring(internal, qid);
 	}
 
 	/* start relay with a first kick */
@@ -874,7 +959,10 @@  struct rte_vdpa_dev_info {
 
 	/* stop the direct IO data path */
 	unset_notify_relay(internal);
-	vdpa_ifcvf_stop(internal);
+	if (internal->device_type == IFCVF_NET)
+		vdpa_ifcvf_stop(internal);
+	else if (internal->device_type == IFCVF_BLK)
+		vdpa_ifcvf_blk_pause(internal);
 	vdpa_disable_vfio_intr(internal);
 
 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);