[v6,06/16] vdpa/ifc: add block device SW live-migration
Checks
Commit Message
Add SW live-migration support to block device.
Add dirty page logging to block device.
Signed-off-by: Andy Pei <andy.pei@intel.com>
---
drivers/vdpa/ifc/base/ifcvf.c | 4 +-
drivers/vdpa/ifc/base/ifcvf.h | 6 ++
drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++-------
3 files changed, 115 insertions(+), 23 deletions(-)
Comments
> -----Original Message-----
> From: Pei, Andy <andy.pei@intel.com>
> Sent: Thursday, April 21, 2022 4:34 PM
> To: dev@dpdk.org
> Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com; Cao,
> Gang <gang.cao@intel.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
>
> Add SW live-migration support to block device.
> Add dirty page logging to block device.
Add SW live-migration support including dirty page logging for block device.
>
> Signed-off-by: Andy Pei <andy.pei@intel.com>
> ---
> drivers/vdpa/ifc/base/ifcvf.c | 4 +-
> drivers/vdpa/ifc/base/ifcvf.h | 6 ++
> drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++++++++++++++++++++++++++++++++++--
> -----
> 3 files changed, 115 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
> index d10c1fd..e417c50 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.c
> +++ b/drivers/vdpa/ifc/base/ifcvf.c
> @@ -191,7 +191,7 @@
> IFCVF_WRITE_REG32(val >> 32, hi);
> }
>
> -STATIC int
> +int
> ifcvf_hw_enable(struct ifcvf_hw *hw)
> {
> struct ifcvf_pci_common_cfg *cfg;
> @@ -240,7 +240,7 @@
> return 0;
> }
>
> -STATIC void
> +void
> ifcvf_hw_disable(struct ifcvf_hw *hw)
> {
> u32 i;
> diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
> index 769c603..6dd7925 100644
> --- a/drivers/vdpa/ifc/base/ifcvf.h
> +++ b/drivers/vdpa/ifc/base/ifcvf.h
> @@ -179,4 +179,10 @@ struct ifcvf_hw {
> u64
> ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
>
> +int
> +ifcvf_hw_enable(struct ifcvf_hw *hw);
> +
> +void
> +ifcvf_hw_disable(struct ifcvf_hw *hw);
> +
> #endif /* _IFCVF_H_ */
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 8d104b7..a23dc2d 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
> }
> }
>
> +static void
> +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
> +{
> + struct ifcvf_hw *hw = &internal->hw;
> + struct rte_vhost_vring vq;
> + int i, vid;
> + uint64_t features = 0;
> + uint64_t log_base = 0, log_size = 0;
> + uint64_t len;
> +
> + vid = internal->vid;
> +
> + if (internal->device_type == IFCVF_BLK) {
> + for (i = 0; i < hw->nr_vring; i++) {
> + rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> + while (vq.avail->idx != vq.used->idx) {
> + ifcvf_notify_queue(hw, i);
> + usleep(10);
> + }
> + hw->vring[i].last_avail_idx = vq.avail->idx;
> + hw->vring[i].last_used_idx = vq.used->idx;
> + }
> + }
> +
> + ifcvf_hw_disable(hw);
> +
> + for (i = 0; i < hw->nr_vring; i++)
> + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> + hw->vring[i].last_used_idx);
> +
> + if (internal->sw_lm)
> + return;
> +
> + rte_vhost_get_negotiated_features(vid, &features);
> + if (RTE_VHOST_NEED_LOG(features)) {
> + ifcvf_disable_logging(hw);
> + rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> + rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> + log_base, IFCVF_LOG_BASE, log_size);
> + /*
> + * IFCVF marks dirty memory pages for only packet buffer,
> + * SW helps to mark the used ring as dirty after device stops.
> + */
> + for (i = 0; i < hw->nr_vring; i++) {
> + len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> + rte_vhost_log_used_vring(vid, i, 0, len);
> + }
> + }
> +}
Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to one function and
check device type internally to do different things? Because as I see, most logic is
the same.
> +
> #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
> static int
> @@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
> }
> hw->vring[i].avail = gpa;
>
> - /* Direct I/O for Tx queue, relay for Rx queue */
> - if (i & 1) {
> - gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> - if (gpa == 0) {
> - DRV_LOG(ERR, "Fail to get GPA for used ring.");
> - return -1;
> + if (internal->device_type == IFCVF_NET) {
> + /* Direct I/O for Tx queue, relay for Rx queue */
> + if (i & 1) {
> + gpa = hva_to_gpa(vid,
> (uint64_t)(uintptr_t)vq.used);
> + if (gpa == 0) {
> + DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> + return -1;
> + }
> + hw->vring[i].used = gpa;
> + } else {
> + hw->vring[i].used = m_vring_iova +
> + (char *)internal->m_vring[i].used -
> + (char *)internal->m_vring[i].desc;
> }
> - hw->vring[i].used = gpa;
> - } else {
> + } else if (internal->device_type == IFCVF_BLK) {
> + /* BLK: relay every queue */
> hw->vring[i].used = m_vring_iova +
> (char *)internal->m_vring[i].used -
> (char *)internal->m_vring[i].desc;
> @@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
> }
> hw->nr_vring = nr_vring;
>
> - return ifcvf_start_hw(&internal->hw);
> + if (internal->device_type == IFCVF_NET)
> + return ifcvf_start_hw(&internal->hw);
> + else if (internal->device_type == IFCVF_BLK)
> + return ifcvf_hw_enable(&internal->hw);
>
> error:
> for (i = 0; i < nr_vring; i++)
> @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
>
> for (i = 0; i < hw->nr_vring; i++) {
> /* synchronize remaining new used entries if any */
> - if ((i & 1) == 0)
> + if (internal->device_type == IFCVF_NET) {
> + if ((i & 1) == 0)
> + update_used_ring(internal, i);
> + } else if (internal->device_type == IFCVF_BLK) {
> update_used_ring(internal, i);
> + }
>
> rte_vhost_get_vhost_vring(vid, i, &vq);
> len = IFCVF_USED_RING_LEN(vq.size);
> @@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
> }
> }
>
> - for (qid = 0; qid < q_num; qid += 2) {
> - ev.events = EPOLLIN | EPOLLPRI;
> - /* leave a flag to mark it's for interrupt */
> - ev.data.u64 = 1 | qid << 1 |
> - (uint64_t)internal->intr_fd[qid] << 32;
> - if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
> - < 0) {
> - DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> - return NULL;
> + if (internal->device_type == IFCVF_NET) {
> + for (qid = 0; qid < q_num; qid += 2) {
> + ev.events = EPOLLIN | EPOLLPRI;
> + /* leave a flag to mark it's for interrupt */
> + ev.data.u64 = 1 | qid << 1 |
> + (uint64_t)internal->intr_fd[qid] << 32;
> + if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> + internal->intr_fd[qid], &ev)
> + < 0) {
> + DRV_LOG(ERR, "epoll add error: %s",
> + strerror(errno));
> + return NULL;
> + }
> + update_used_ring(internal, qid);
> + }
> + } else if (internal->device_type == IFCVF_BLK) {
> + for (qid = 0; qid < q_num; qid += 1) {
> + ev.events = EPOLLIN | EPOLLPRI;
> + /* leave a flag to mark it's for interrupt */
> + ev.data.u64 = 1 | qid << 1 |
> + (uint64_t)internal->intr_fd[qid] << 32;
> + if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> + internal->intr_fd[qid], &ev)
> + < 0) {
> + DRV_LOG(ERR, "epoll add error: %s",
> + strerror(errno));
> + return NULL;
> + }
> + update_used_ring(internal, qid);
It seems we can also reduce duplicate code for above case. And for other checks,
if we can use only one combined condition to check, I prefer to just use one.
Thanks,
Chenbo
> }
> - update_used_ring(internal, qid);
> }
>
> /* start relay with a first kick */
> @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
>
> /* stop the direct IO data path */
> unset_notify_relay(internal);
> - vdpa_ifcvf_stop(internal);
> + if (internal->device_type == IFCVF_NET)
> + vdpa_ifcvf_stop(internal);
> + else if (internal->device_type == IFCVF_BLK)
> + vdpa_ifcvf_blk_pause(internal);
> vdpa_disable_vfio_intr(internal);
>
> ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
> --
> 1.8.3.1
Hi Chenbo,
Thanks for your reply.
My reply is inline.
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Monday, April 25, 2022 9:10 PM
> To: Pei, Andy <andy.pei@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; Cao, Gang <gang.cao@intel.com>; Liu,
> Changpeng <changpeng.liu@intel.com>
> Subject: RE: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
>
> > -----Original Message-----
> > From: Pei, Andy <andy.pei@intel.com>
> > Sent: Thursday, April 21, 2022 4:34 PM
> > To: dev@dpdk.org
> > Cc: Xia, Chenbo <chenbo.xia@intel.com>; maxime.coquelin@redhat.com;
> > Cao, Gang <gang.cao@intel.com>; Liu, Changpeng
> > <changpeng.liu@intel.com>
> > Subject: [PATCH v6 06/16] vdpa/ifc: add block device SW live-migration
> >
> > Add SW live-migration support to block device.
> > Add dirty page logging to block device.
>
> Add SW live-migration support including dirty page logging for block device.
>
Sure, I will remove " Add dirty page logging to block device." In next version.
> >
> > Signed-off-by: Andy Pei <andy.pei@intel.com>
> > ---
> > drivers/vdpa/ifc/base/ifcvf.c | 4 +-
> > drivers/vdpa/ifc/base/ifcvf.h | 6 ++
> > drivers/vdpa/ifc/ifcvf_vdpa.c | 128
> > +++++++++++++++++++++++++++++++++++--
> > -----
> > 3 files changed, 115 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.c
> > b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..e417c50 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.c
> > +++ b/drivers/vdpa/ifc/base/ifcvf.c
> > @@ -191,7 +191,7 @@
> > IFCVF_WRITE_REG32(val >> 32, hi);
> > }
> >
> > -STATIC int
> > +int
> > ifcvf_hw_enable(struct ifcvf_hw *hw)
> > {
> > struct ifcvf_pci_common_cfg *cfg;
> > @@ -240,7 +240,7 @@
> > return 0;
> > }
> >
> > -STATIC void
> > +void
> > ifcvf_hw_disable(struct ifcvf_hw *hw) {
> > u32 i;
> > diff --git a/drivers/vdpa/ifc/base/ifcvf.h
> > b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644
> > --- a/drivers/vdpa/ifc/base/ifcvf.h
> > +++ b/drivers/vdpa/ifc/base/ifcvf.h
> > @@ -179,4 +179,10 @@ struct ifcvf_hw {
> > u64
> > ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
> >
> > +int
> > +ifcvf_hw_enable(struct ifcvf_hw *hw);
> > +
> > +void
> > +ifcvf_hw_disable(struct ifcvf_hw *hw);
> > +
> > #endif /* _IFCVF_H_ */
> > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8d104b7..a23dc2d 100644
> > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> > @@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
> > }
> > }
> >
> > +static void
> > +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) {
> > + struct ifcvf_hw *hw = &internal->hw;
> > + struct rte_vhost_vring vq;
> > + int i, vid;
> > + uint64_t features = 0;
> > + uint64_t log_base = 0, log_size = 0;
> > + uint64_t len;
> > +
> > + vid = internal->vid;
> > +
> > + if (internal->device_type == IFCVF_BLK) {
> > + for (i = 0; i < hw->nr_vring; i++) {
> > + rte_vhost_get_vhost_vring(internal->vid, i, &vq);
> > + while (vq.avail->idx != vq.used->idx) {
> > + ifcvf_notify_queue(hw, i);
> > + usleep(10);
> > + }
> > + hw->vring[i].last_avail_idx = vq.avail->idx;
> > + hw->vring[i].last_used_idx = vq.used->idx;
> > + }
> > + }
> > +
> > + ifcvf_hw_disable(hw);
> > +
> > + for (i = 0; i < hw->nr_vring; i++)
> > + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
> > + hw->vring[i].last_used_idx);
> > +
> > + if (internal->sw_lm)
> > + return;
> > +
> > + rte_vhost_get_negotiated_features(vid, &features);
> > + if (RTE_VHOST_NEED_LOG(features)) {
> > + ifcvf_disable_logging(hw);
> > + rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
> > + rte_vfio_container_dma_unmap(internal->vfio_container_fd,
> > + log_base, IFCVF_LOG_BASE, log_size);
> > + /*
> > + * IFCVF marks dirty memory pages for only packet buffer,
> > + * SW helps to mark the used ring as dirty after device stops.
> > + */
> > + for (i = 0; i < hw->nr_vring; i++) {
> > + len = IFCVF_USED_RING_LEN(hw->vring[i].size);
> > + rte_vhost_log_used_vring(vid, i, 0, len);
> > + }
> > + }
> > +}
>
> Can we consider combining vdpa_ifcvf_blk_pause and vdpa_ifcvf_stop to
> one function and check device type internally to do different things? Because
> as I see, most logic is the same.
>
OK, I will address it in next version.
> > +
> > #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
> > sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1)) static int @@ -
> 659,15
> > +709,22 @@ struct rte_vdpa_dev_info {
> > }
> > hw->vring[i].avail = gpa;
> >
> > - /* Direct I/O for Tx queue, relay for Rx queue */
> > - if (i & 1) {
> > - gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
> > - if (gpa == 0) {
> > - DRV_LOG(ERR, "Fail to get GPA for used
> ring.");
> > - return -1;
> > + if (internal->device_type == IFCVF_NET) {
> > + /* Direct I/O for Tx queue, relay for Rx queue */
> > + if (i & 1) {
> > + gpa = hva_to_gpa(vid,
> > (uint64_t)(uintptr_t)vq.used);
> > + if (gpa == 0) {
> > + DRV_LOG(ERR, "Fail to get GPA for
> used
> > ring.");
> > + return -1;
> > + }
> > + hw->vring[i].used = gpa;
> > + } else {
> > + hw->vring[i].used = m_vring_iova +
> > + (char *)internal->m_vring[i].used -
> > + (char *)internal->m_vring[i].desc;
> > }
> > - hw->vring[i].used = gpa;
> > - } else {
> > + } else if (internal->device_type == IFCVF_BLK) {
> > + /* BLK: relay every queue */
> > hw->vring[i].used = m_vring_iova +
> > (char *)internal->m_vring[i].used -
> > (char *)internal->m_vring[i].desc; @@ -686,7
> +743,10 @@ struct
> > rte_vdpa_dev_info {
> > }
> > hw->nr_vring = nr_vring;
> >
> > - return ifcvf_start_hw(&internal->hw);
> > + if (internal->device_type == IFCVF_NET)
> > + return ifcvf_start_hw(&internal->hw);
> > + else if (internal->device_type == IFCVF_BLK)
> > + return ifcvf_hw_enable(&internal->hw);
> >
> > error:
> > for (i = 0; i < nr_vring; i++)
> > @@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
> >
> > for (i = 0; i < hw->nr_vring; i++) {
> > /* synchronize remaining new used entries if any */
> > - if ((i & 1) == 0)
> > + if (internal->device_type == IFCVF_NET) {
> > + if ((i & 1) == 0)
> > + update_used_ring(internal, i);
> > + } else if (internal->device_type == IFCVF_BLK) {
> > update_used_ring(internal, i);
> > + }
> >
> > rte_vhost_get_vhost_vring(vid, i, &vq);
> > len = IFCVF_USED_RING_LEN(vq.size); @@ -773,17 +837,36
> @@ struct
> > rte_vdpa_dev_info {
> > }
> > }
> >
> > - for (qid = 0; qid < q_num; qid += 2) {
> > - ev.events = EPOLLIN | EPOLLPRI;
> > - /* leave a flag to mark it's for interrupt */
> > - ev.data.u64 = 1 | qid << 1 |
> > - (uint64_t)internal->intr_fd[qid] << 32;
> > - if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid],
> &ev)
> > - < 0) {
> > - DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
> > - return NULL;
> > + if (internal->device_type == IFCVF_NET) {
> > + for (qid = 0; qid < q_num; qid += 2) {
> > + ev.events = EPOLLIN | EPOLLPRI;
> > + /* leave a flag to mark it's for interrupt */
> > + ev.data.u64 = 1 | qid << 1 |
> > + (uint64_t)internal->intr_fd[qid] << 32;
> > + if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > + internal->intr_fd[qid], &ev)
> > + < 0) {
> > + DRV_LOG(ERR, "epoll add error: %s",
> > + strerror(errno));
> > + return NULL;
> > + }
> > + update_used_ring(internal, qid);
> > + }
> > + } else if (internal->device_type == IFCVF_BLK) {
> > + for (qid = 0; qid < q_num; qid += 1) {
> > + ev.events = EPOLLIN | EPOLLPRI;
> > + /* leave a flag to mark it's for interrupt */
> > + ev.data.u64 = 1 | qid << 1 |
> > + (uint64_t)internal->intr_fd[qid] << 32;
> > + if (epoll_ctl(epfd, EPOLL_CTL_ADD,
> > + internal->intr_fd[qid], &ev)
> > + < 0) {
> > + DRV_LOG(ERR, "epoll add error: %s",
> > + strerror(errno));
> > + return NULL;
> > + }
> > + update_used_ring(internal, qid);
>
> It seems we can also reduce duplicate code for above case. And for other
> checks, if we can use only one combined condition to check, I prefer to just
> use one.
>
OK, I will address it in next version.
> Thanks,
> Chenbo
>
> > }
> > - update_used_ring(internal, qid);
> > }
> >
> > /* start relay with a first kick */
> > @@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
> >
> > /* stop the direct IO data path */
> > unset_notify_relay(internal);
> > - vdpa_ifcvf_stop(internal);
> > + if (internal->device_type == IFCVF_NET)
> > + vdpa_ifcvf_stop(internal);
> > + else if (internal->device_type == IFCVF_BLK)
> > + vdpa_ifcvf_blk_pause(internal);
> > vdpa_disable_vfio_intr(internal);
> >
> > ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL,
> false);
> > --
> > 1.8.3.1
>
@@ -191,7 +191,7 @@
IFCVF_WRITE_REG32(val >> 32, hi);
}
-STATIC int
+int
ifcvf_hw_enable(struct ifcvf_hw *hw)
{
struct ifcvf_pci_common_cfg *cfg;
@@ -240,7 +240,7 @@
return 0;
}
-STATIC void
+void
ifcvf_hw_disable(struct ifcvf_hw *hw)
{
u32 i;
@@ -179,4 +179,10 @@ struct ifcvf_hw {
u64
ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
#endif /* _IFCVF_H_ */
@@ -345,6 +345,56 @@ struct rte_vdpa_dev_info {
}
}
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+ struct ifcvf_hw *hw = &internal->hw;
+ struct rte_vhost_vring vq;
+ int i, vid;
+ uint64_t features = 0;
+ uint64_t log_base = 0, log_size = 0;
+ uint64_t len;
+
+ vid = internal->vid;
+
+ if (internal->device_type == IFCVF_BLK) {
+ for (i = 0; i < hw->nr_vring; i++) {
+ rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+ while (vq.avail->idx != vq.used->idx) {
+ ifcvf_notify_queue(hw, i);
+ usleep(10);
+ }
+ hw->vring[i].last_avail_idx = vq.avail->idx;
+ hw->vring[i].last_used_idx = vq.used->idx;
+ }
+ }
+
+ ifcvf_hw_disable(hw);
+
+ for (i = 0; i < hw->nr_vring; i++)
+ rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+ hw->vring[i].last_used_idx);
+
+ if (internal->sw_lm)
+ return;
+
+ rte_vhost_get_negotiated_features(vid, &features);
+ if (RTE_VHOST_NEED_LOG(features)) {
+ ifcvf_disable_logging(hw);
+ rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+ rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+ log_base, IFCVF_LOG_BASE, log_size);
+ /*
+ * IFCVF marks dirty memory pages for only packet buffer,
+ * SW helps to mark the used ring as dirty after device stops.
+ */
+ for (i = 0; i < hw->nr_vring; i++) {
+ len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+ rte_vhost_log_used_vring(vid, i, 0, len);
+ }
+ }
+}
+
#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
static int
@@ -659,15 +709,22 @@ struct rte_vdpa_dev_info {
}
hw->vring[i].avail = gpa;
- /* Direct I/O for Tx queue, relay for Rx queue */
- if (i & 1) {
- gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
- if (gpa == 0) {
- DRV_LOG(ERR, "Fail to get GPA for used ring.");
- return -1;
+ if (internal->device_type == IFCVF_NET) {
+ /* Direct I/O for Tx queue, relay for Rx queue */
+ if (i & 1) {
+ gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+ if (gpa == 0) {
+ DRV_LOG(ERR, "Fail to get GPA for used ring.");
+ return -1;
+ }
+ hw->vring[i].used = gpa;
+ } else {
+ hw->vring[i].used = m_vring_iova +
+ (char *)internal->m_vring[i].used -
+ (char *)internal->m_vring[i].desc;
}
- hw->vring[i].used = gpa;
- } else {
+ } else if (internal->device_type == IFCVF_BLK) {
+ /* BLK: relay every queue */
hw->vring[i].used = m_vring_iova +
(char *)internal->m_vring[i].used -
(char *)internal->m_vring[i].desc;
@@ -686,7 +743,10 @@ struct rte_vdpa_dev_info {
}
hw->nr_vring = nr_vring;
- return ifcvf_start_hw(&internal->hw);
+ if (internal->device_type == IFCVF_NET)
+ return ifcvf_start_hw(&internal->hw);
+ else if (internal->device_type == IFCVF_BLK)
+ return ifcvf_hw_enable(&internal->hw);
error:
for (i = 0; i < nr_vring; i++)
@@ -710,8 +770,12 @@ struct rte_vdpa_dev_info {
for (i = 0; i < hw->nr_vring; i++) {
/* synchronize remaining new used entries if any */
- if ((i & 1) == 0)
+ if (internal->device_type == IFCVF_NET) {
+ if ((i & 1) == 0)
+ update_used_ring(internal, i);
+ } else if (internal->device_type == IFCVF_BLK) {
update_used_ring(internal, i);
+ }
rte_vhost_get_vhost_vring(vid, i, &vq);
len = IFCVF_USED_RING_LEN(vq.size);
@@ -773,17 +837,36 @@ struct rte_vdpa_dev_info {
}
}
- for (qid = 0; qid < q_num; qid += 2) {
- ev.events = EPOLLIN | EPOLLPRI;
- /* leave a flag to mark it's for interrupt */
- ev.data.u64 = 1 | qid << 1 |
- (uint64_t)internal->intr_fd[qid] << 32;
- if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
- < 0) {
- DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
- return NULL;
+ if (internal->device_type == IFCVF_NET) {
+ for (qid = 0; qid < q_num; qid += 2) {
+ ev.events = EPOLLIN | EPOLLPRI;
+ /* leave a flag to mark it's for interrupt */
+ ev.data.u64 = 1 | qid << 1 |
+ (uint64_t)internal->intr_fd[qid] << 32;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+ internal->intr_fd[qid], &ev)
+ < 0) {
+ DRV_LOG(ERR, "epoll add error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ update_used_ring(internal, qid);
+ }
+ } else if (internal->device_type == IFCVF_BLK) {
+ for (qid = 0; qid < q_num; qid += 1) {
+ ev.events = EPOLLIN | EPOLLPRI;
+ /* leave a flag to mark it's for interrupt */
+ ev.data.u64 = 1 | qid << 1 |
+ (uint64_t)internal->intr_fd[qid] << 32;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD,
+ internal->intr_fd[qid], &ev)
+ < 0) {
+ DRV_LOG(ERR, "epoll add error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ update_used_ring(internal, qid);
}
- update_used_ring(internal, qid);
}
/* start relay with a first kick */
@@ -871,7 +954,10 @@ struct rte_vdpa_dev_info {
/* stop the direct IO data path */
unset_notify_relay(internal);
- vdpa_ifcvf_stop(internal);
+ if (internal->device_type == IFCVF_NET)
+ vdpa_ifcvf_stop(internal);
+ else if (internal->device_type == IFCVF_BLK)
+ vdpa_ifcvf_blk_pause(internal);
vdpa_disable_vfio_intr(internal);
ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);