[v3,5/5] vdpa/sfc: Add support for SW assisted live migration

Message ID 20220714134804.60495-6-asaini@xilinx.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series Add support for live migration and cleanup MCDI headers |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/intel-Testing success Testing PASS

Commit Message

abhimanyu.saini@xilinx.com July 14, 2022, 1:48 p.m. UTC
  From: Abhimanyu Saini <absaini@amd.com>

In SW assisted live migration, vDPA driver will stop all virtqueues
and setup up SW vrings to relay the communication between the
virtio driver and the vDPA device using an event driven relay thread

This will allow vDPA driver to help on guest dirty page logging for
live migration.

Signed-off-by: Abhimanyu Saini <absaini@amd.com>
---
v2:
* Fix checkpatch warnings
* Add a cover letter
v3:
* Restructure patchset

 drivers/vdpa/sfc/sfc_vdpa.h     |   1 +
 drivers/vdpa/sfc/sfc_vdpa_ops.c | 337 +++++++++++++++++++++++++++++++++++++---
 drivers/vdpa/sfc/sfc_vdpa_ops.h |  15 +-
 3 files changed, 330 insertions(+), 23 deletions(-)
  

Comments

Andrew Rybchenko July 28, 2022, 1:42 p.m. UTC | #1
On 7/14/22 16:48, abhimanyu.saini@xilinx.com wrote:
> From: Abhimanyu Saini <absaini@amd.com>
> 
> In SW assisted live migration, vDPA driver will stop all virtqueues
> and setup up SW vrings to relay the communication between the
> virtio driver and the vDPA device using an event driven relay thread
> 
> This will allow vDPA driver to help on guest dirty page logging for
> live migration.
> 
> Signed-off-by: Abhimanyu Saini <absaini@amd.com>
> ---
> v2:
> * Fix checkpatch warnings
> * Add a cover letter
> v3:
> * Restructure patchset
> 
>   drivers/vdpa/sfc/sfc_vdpa.h     |   1 +
>   drivers/vdpa/sfc/sfc_vdpa_ops.c | 337 +++++++++++++++++++++++++++++++++++++---
>   drivers/vdpa/sfc/sfc_vdpa_ops.h |  15 +-
>   3 files changed, 330 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
> index daeb27d..ae522ca 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa.h
> @@ -18,6 +18,7 @@
>   
>   #define SFC_VDPA_MAC_ADDR			"mac"
>   #define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
> +#define SFC_SW_VRING_IOVA			0x300000000000
>   
>   /* Broadcast & Unicast MAC filters are supported */
>   #define SFC_MAX_SUPPORTED_FILTERS		3
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> index 6401d4e..1d29ee7 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa_ops.c
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
> @@ -4,10 +4,13 @@
>   
>   #include <pthread.h>
>   #include <unistd.h>
> +#include <sys/epoll.h>
>   #include <sys/ioctl.h>
>   
> +#include <rte_eal_paging.h>
>   #include <rte_errno.h>
>   #include <rte_malloc.h>
> +#include <rte_memory.h>
>   #include <rte_vdpa.h>
>   #include <rte_vfio.h>
>   #include <rte_vhost.h>
> @@ -33,7 +36,9 @@
>    */
>   #define SFC_VDPA_DEFAULT_FEATURES \
>   		((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
> -		 (1ULL << VIRTIO_NET_F_MQ))
> +		 (1ULL << VIRTIO_NET_F_MQ) | \
> +		 (1ULL << VHOST_F_LOG_ALL) | \
> +		 (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE))
>   
>   #define SFC_VDPA_MSIX_IRQ_SET_BUF_LEN \
>   		(sizeof(struct vfio_irq_set) + \
> @@ -42,6 +47,142 @@
>   /* It will be used for target VF when calling function is not PF */
>   #define SFC_VDPA_VF_NULL		0xFFFF
>   
> +#define SFC_VDPA_DECODE_FD(data)	(data.u64 >> 32)

Macro argument should be enclosed in paranthesis to be on a safe
side taking various operators prioririties into account.
((data).u64 >> 32). Same below.

> +#define SFC_VDPA_DECODE_QID(data)	(data.u32 >> 1)
> +#define SFC_VDPA_DECODE_EV_TYPE(data)	(data.u32 & 1)
> +
> +/*
> + * Create q_num number of epoll events for kickfd interrupts
> + * and q_num/2 events for callfd interrupts. Round up the
> + * total to (q_num * 2) number of events.
> + */
> +#define SFC_VDPA_SW_RELAY_EVENT_NUM(q_num)	(q_num * 2)

Enclose macro argument in parenthesis.

> +
> +static inline uint64_t
> +sfc_vdpa_encode_ev_data(int type, uint32_t qid, int fd)
> +{
> +	SFC_VDPA_ASSERT(fd > UINT32_MAX || qid > UINT32_MAX / 2);

fd is a signed integer, but it is compared vs define with unsigned
value. Don't we need to ensure that fd is really non-negative?


> +	return type | (qid << 1) | (uint64_t)fd << 32;
> +}
> +
> +static inline void
> +sfc_vdpa_queue_relay(struct sfc_vdpa_ops_data *ops_data, uint32_t qid)
> +{
> +	rte_vdpa_relay_vring_used(ops_data->vid, qid, &ops_data->sw_vq[qid]);
> +	rte_vhost_vring_call(ops_data->vid, qid);
> +}
> +
> +static void*

missing space between void and *

> +sfc_vdpa_sw_relay(void *data)
> +{
> +	uint64_t buf;
> +	uint32_t qid, q_num;
> +	struct epoll_event ev;
> +	struct rte_vhost_vring vring;
> +	int nbytes, i, ret, fd, epfd, nfds = 0;

So long list of variables in one line is not readable.
It is better to move some variables below in loop bodies below to
make it block local.

> +	struct epoll_event events[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
> +	struct sfc_vdpa_ops_data *ops_data = (struct sfc_vdpa_ops_data *)data;
> +
> +	q_num = rte_vhost_get_vring_num(ops_data->vid);
> +	epfd = epoll_create(SFC_VDPA_SW_RELAY_EVENT_NUM(q_num));
> +	if (epfd < 0) {
> +		sfc_vdpa_log_init(ops_data->dev_handle,
> +				  "failed to create epoll instance");
> +		goto fail_epoll;
> +	}
> +	ops_data->epfd = epfd;
> +
> +	vring.kickfd = -1;
> +	for (qid = 0; qid < q_num; qid++) {
> +		ev.events = EPOLLIN | EPOLLPRI;
> +		ret = rte_vhost_get_vhost_vring(ops_data->vid, qid, &vring);
> +		if (ret != 0) {
> +			sfc_vdpa_log_init(ops_data->dev_handle,
> +					  "rte_vhost_get_vhost_vring error %s",
> +					  strerror(errno));
> +			goto fail_vring;
> +		}
> +
> +		ev.data.u64 = sfc_vdpa_encode_ev_data(0, qid, vring.kickfd);
> +		if (epoll_ctl(epfd, EPOLL_CTL_ADD, vring.kickfd, &ev) < 0) {
> +			sfc_vdpa_log_init(ops_data->dev_handle,
> +					  "epoll add error: %s",
> +					  strerror(errno));
> +			goto fail_epoll_add;
> +		}
> +	}
> +
> +	/*
> +	 * Register intr_fd created by vDPA driver in lieu of qemu's callfd
> +	 * to intercept rx queue notification. So that we can monitor rx
> +	 * notifications and issue rte_vdpa_relay_vring_used()
> +	 */
> +	for (qid = 0; qid < q_num; qid += 2) {
> +		fd = ops_data->intr_fd[qid];
> +		ev.events = EPOLLIN | EPOLLPRI;
> +		ev.data.u64 = sfc_vdpa_encode_ev_data(1, qid, fd);
> +		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
> +			sfc_vdpa_log_init(ops_data->dev_handle,
> +					  "epoll add error: %s",
> +					  strerror(errno));
> +			goto fail_epoll_add;
> +		}
> +		sfc_vdpa_queue_relay(ops_data, qid);
> +	}
> +
> +	/*
> +	 * virtio driver in VM was continuously sending queue notifications
> +	 * while were setting up software vrings and hence the HW misses
> +	 * these doorbell notifications. Since, it is safe to send duplicate
> +	 * doorbell, send another doorbell from vDPA driver.
> +	 */
> +	for (qid = 0; qid < q_num; qid++)
> +		rte_write16(qid, ops_data->vq_cxt[qid].doorbell);
> +
> +	for (;;) {

Is it forever loop? Why? If it is intended, it should be highlighted
in comments.

> +		nfds = epoll_wait(epfd, events,
> +				  SFC_VDPA_SW_RELAY_EVENT_NUM(q_num), -1);
> +		if (nfds < 0) {
> +			if (errno == EINTR)
> +				continue;
> +			sfc_vdpa_log_init(ops_data->dev_handle,
> +					  "epoll_wait return fail\n");
> +			goto fail_epoll_wait;
> +		}
> +
> +		for (i = 0; i < nfds; i++) {
> +			fd = SFC_VDPA_DECODE_FD(events[i].data);
> +			/* Ensure kickfd is not busy before proceeding */
> +			for (;;) {
> +				nbytes = read(fd, &buf, 8);
> +				if (nbytes < 0) {
> +					if (errno == EINTR ||
> +					    errno == EWOULDBLOCK ||
> +					    errno == EAGAIN)
> +						continue;
> +				}
> +				break;
> +			}

I think do { } while is a better constract above. It would be easier to
read and understand.

> +
> +			qid = SFC_VDPA_DECODE_QID(events[i].data);
> +			if (SFC_VDPA_DECODE_EV_TYPE(events[i].data))
> +				sfc_vdpa_queue_relay(ops_data, qid);
> +			else
> +				rte_write16(qid, ops_data->vq_cxt[qid].doorbell);
> +		}
> +	}
> +
> +	return NULL;
> +
> +fail_epoll:
> +fail_vring:
> +fail_epoll_add:
> +fail_epoll_wait:
> +	close(epfd);

May be it is better to avoid close() call with negative input paramter?

> +	ops_data->epfd = -1;
> +	return NULL;
> +}
> +
>   static int
>   sfc_vdpa_get_device_features(struct sfc_vdpa_ops_data *ops_data)
>   {
> @@ -99,7 +240,7 @@
>   static int
>   sfc_vdpa_enable_vfio_intr(struct sfc_vdpa_ops_data *ops_data)
>   {
> -	int rc;
> +	int rc, fd;
>   	int *irq_fd_ptr;
>   	int vfio_dev_fd;
>   	uint32_t i, num_vring;
> @@ -131,6 +272,17 @@
>   			return -1;
>   
>   		irq_fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
> +		if (ops_data->sw_fallback_mode && !(i & 1)) {

(i & 1) considition should be wrapper in a tiny function to make
it clear the meaning of the check.

> +			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +			if (fd < 0) {
> +				sfc_vdpa_err(ops_data->dev_handle,
> +					     "failed to create eventfd");
> +				goto fail_eventfd;
> +			}
> +			ops_data->intr_fd[i] = fd;
> +			irq_fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
> +		} else
> +			ops_data->intr_fd[i] = -1;

Since if body uses curly brackets, please, use curly brackets in else
body as well regardles number of lines/statements.

>   	}
>   
>   	rc = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> @@ -138,16 +290,26 @@
>   		sfc_vdpa_err(ops_data->dev_handle,
>   			     "error enabling MSI-X interrupts: %s",
>   			     strerror(errno));
> -		return -1;
> +		goto fail_ioctl;
>   	}
>   
>   	return 0;
> +
> +fail_ioctl:
> +fail_eventfd:
> +	for (i = 0; i < num_vring; i++) {
> +		if (ops_data->intr_fd[i] != -1) {

Why do you use different condition for bad FD? -1 exactly here, but
just negative below. It is better to be consistent.

> +			close(ops_data->intr_fd[i]);
> +			ops_data->intr_fd[i] = -1;
> +		}
> +	}
> +	return -1;
>   }
>   
>   static int
>   sfc_vdpa_disable_vfio_intr(struct sfc_vdpa_ops_data *ops_data)
>   {
> -	int rc;
> +	int rc, i;
>   	int vfio_dev_fd;
>   	struct vfio_irq_set irq_set;
>   	void *dev;
> @@ -161,6 +323,12 @@
>   	irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX;
>   	irq_set.start = 0;
>   
> +	for (i = 0; i < ops_data->vq_count; i++) {
> +		if (ops_data->intr_fd[i] >= 0)

here (see above)

> +			close(ops_data->intr_fd[i]);
> +		ops_data->intr_fd[i] = -1;
> +	}
> +
>   	rc = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set);
>   	if (rc) {
>   		sfc_vdpa_err(ops_data->dev_handle,
> @@ -223,12 +391,15 @@
>   static int
>   sfc_vdpa_virtq_start(struct sfc_vdpa_ops_data *ops_data, int vq_num)
>   {
> -	int rc;
> +	int rc, fd;
> +	uint64_t size;
>   	uint32_t doorbell;
>   	efx_virtio_vq_t *vq;
> +	void *vring_buf, *dev;
>   	struct sfc_vdpa_vring_info vring;
>   	efx_virtio_vq_cfg_t vq_cfg;
>   	efx_virtio_vq_dyncfg_t vq_dyncfg;
> +	uint64_t sw_vq_iova = ops_data->sw_vq_iova;
>   
>   	vq = ops_data->vq_cxt[vq_num].vq;
>   	if (vq == NULL)
> @@ -241,6 +412,33 @@
>   		goto fail_vring_info;
>   	}
>   
> +	if (ops_data->sw_fallback_mode) {
> +		size = vring_size(vring.size, rte_mem_page_size());
> +		size = RTE_ALIGN_CEIL(size, rte_mem_page_size());
> +		vring_buf = rte_zmalloc("vdpa", size, rte_mem_page_size());
> +		vring_init(&ops_data->sw_vq[vq_num], vring.size, vring_buf,
> +			   rte_mem_page_size());
> +
> +		dev = ops_data->dev_handle;
> +		fd = sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
> +		rc = rte_vfio_container_dma_map(fd,
> +						(uint64_t)(uintptr_t)vring_buf,
> +						sw_vq_iova, size);
> +
> +		/* Direct I/O for Tx queue, relay for Rx queue */
> +		if (!(vq_num & 1))

Use tiny function (see above).

> +			vring.used = sw_vq_iova +
> +				(char *)ops_data->sw_vq[vq_num].used -
> +				(char *)ops_data->sw_vq[vq_num].desc;
> +
> +		ops_data->sw_vq[vq_num].used->idx = vring.last_used_idx;
> +		ops_data->sw_vq[vq_num].avail->idx = vring.last_avail_idx;
> +
> +		ops_data->vq_cxt[vq_num].sw_vq_iova = sw_vq_iova;
> +		ops_data->vq_cxt[vq_num].sw_vq_size = size;
> +		ops_data->sw_vq_iova += size;
> +	}
> +
>   	vq_cfg.evvc_target_vf = SFC_VDPA_VF_NULL;
>   
>   	/* even virtqueue for RX and odd for TX */
> @@ -309,9 +507,12 @@
>   static int
>   sfc_vdpa_virtq_stop(struct sfc_vdpa_ops_data *ops_data, int vq_num)
>   {
> -	int rc;
> +	int rc, fd;
> +	void *dev, *buf;
> +	uint64_t size, len, iova;
>   	efx_virtio_vq_dyncfg_t vq_idx;
>   	efx_virtio_vq_t *vq;
> +	struct rte_vhost_vring vring;
>   
>   	if (ops_data->vq_cxt[vq_num].enable != B_TRUE)
>   		return -1;
> @@ -320,12 +521,34 @@
>   	if (vq == NULL)
>   		return -1;
>   
> +	if (ops_data->sw_fallback_mode) {
> +		dev = ops_data->dev_handle;
> +		fd = sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
> +		/* synchronize remaining new used entries if any */
> +		if (!(vq_num & 1))

same here

> +			sfc_vdpa_queue_relay(ops_data, vq_num);
> +
> +		rte_vhost_get_vhost_vring(ops_data->vid, vq_num, &vring);
> +		len = SFC_VDPA_USED_RING_LEN(vring.size);
> +		rte_vhost_log_used_vring(ops_data->vid, vq_num, 0, len);
> +
> +		buf = ops_data->sw_vq[vq_num].desc;
> +		size = ops_data->vq_cxt[vq_num].sw_vq_size;
> +		iova = ops_data->vq_cxt[vq_num].sw_vq_iova;
> +		rte_vfio_container_dma_unmap(fd, (uint64_t)(uintptr_t)buf,
> +				iova, size);
> +	}
> +
>   	/* stop the vq */
>   	rc = efx_virtio_qstop(vq, &vq_idx);
>   	if (rc == 0) {
> -		ops_data->vq_cxt[vq_num].cidx = vq_idx.evvd_vq_used_idx;
> -		ops_data->vq_cxt[vq_num].pidx = vq_idx.evvd_vq_avail_idx;
> +		if (ops_data->sw_fallback_mode)
> +			vq_idx.evvd_vq_avail_idx = vq_idx.evvd_vq_used_idx;
> +		rte_vhost_set_vring_base(ops_data->vid, vq_num,
> +					 vq_idx.evvd_vq_avail_idx,
> +					 vq_idx.evvd_vq_used_idx);
>   	}
> +
>   	ops_data->vq_cxt[vq_num].enable = B_FALSE;
>   
>   	return rc;
> @@ -450,7 +673,11 @@
>   
>   	SFC_EFX_ASSERT(ops_data->state == SFC_VDPA_STATE_CONFIGURED);
>   
> -	sfc_vdpa_log_init(ops_data->dev_handle, "entry");
> +	if (ops_data->sw_fallback_mode) {
> +		sfc_vdpa_log_init(ops_data->dev_handle,
> +				  "Trying to start VDPA with SW I/O relay");
> +		ops_data->sw_vq_iova = SFC_SW_VRING_IOVA;
> +	}
>   
>   	ops_data->state = SFC_VDPA_STATE_STARTING;
>   
> @@ -675,6 +902,7 @@
>   sfc_vdpa_dev_close(int vid)
>   {
>   	int ret;
> +	void *status;
>   	struct rte_vdpa_device *vdpa_dev;
>   	struct sfc_vdpa_ops_data *ops_data;
>   
> @@ -707,7 +935,23 @@
>   	}
>   	ops_data->is_notify_thread_started = false;
>   
> +	if (ops_data->sw_fallback_mode) {
> +		ret = pthread_cancel(ops_data->sw_relay_thread_id);
> +		if (ret != 0)
> +			sfc_vdpa_err(ops_data->dev_handle,
> +				     "failed to cancel LM relay thread: %s",
> +				     rte_strerror(ret));
> +
> +		ret = pthread_join(ops_data->sw_relay_thread_id, &status);
> +		if (ret != 0)
> +			sfc_vdpa_err(ops_data->dev_handle,
> +				     "failed to join LM relay thread: %s",
> +				     rte_strerror(ret));
> +	}
> +
>   	sfc_vdpa_stop(ops_data);
> +	ops_data->sw_fallback_mode = false;
> +
>   	sfc_vdpa_close(ops_data);
>   
>   	sfc_vdpa_adapter_unlock(ops_data->dev_handle);
> @@ -774,9 +1018,49 @@
>   static int
>   sfc_vdpa_set_features(int vid)
>   {
> -	RTE_SET_USED(vid);
> +	int ret;
> +	uint64_t features = 0;
> +	struct rte_vdpa_device *vdpa_dev;
> +	struct sfc_vdpa_ops_data *ops_data;
>   
> -	return -1;
> +	vdpa_dev = rte_vhost_get_vdpa_device(vid);
> +	ops_data = sfc_vdpa_get_data_by_dev(vdpa_dev);
> +	if (ops_data == NULL)
> +		return -1;
> +
> +	rte_vhost_get_negotiated_features(vid, &features);
> +
> +	if (!RTE_VHOST_NEED_LOG(features))
> +		return -1;
> +
> +	sfc_vdpa_info(ops_data->dev_handle, "live-migration triggered");
> +
> +	sfc_vdpa_adapter_lock(ops_data->dev_handle);
> +
> +	/* Stop HW Offload and unset host notifier */
> +	sfc_vdpa_stop(ops_data);
> +	if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false) != 0)
> +		sfc_vdpa_info(ops_data->dev_handle,
> +			      "vDPA (%s): Failed to clear host notifier",
> +			      ops_data->vdpa_dev->device->name);
> +
> +	/* Restart vDPA with SW relay on RX queue */
> +	ops_data->sw_fallback_mode = true;
> +	sfc_vdpa_start(ops_data);
> +	ret = pthread_create(&ops_data->sw_relay_thread_id, NULL,
> +			     sfc_vdpa_sw_relay,	(void *)ops_data);
> +	if (ret != 0)
> +		sfc_vdpa_err(ops_data->dev_handle,
> +			     "failed to create rx_relay thread: %s",
> +			     rte_strerror(ret));
> +
> +	if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
> +		sfc_vdpa_info(ops_data->dev_handle, "notifier setup failed!");
> +
> +	sfc_vdpa_adapter_unlock(ops_data->dev_handle);
> +	sfc_vdpa_info(ops_data->dev_handle, "SW fallback setup done!");
> +
> +	return 0;
>   }
>   
>   static int
> @@ -860,17 +1144,28 @@
>   	sfc_vdpa_info(dev, "vDPA ops get_notify_area :: offset : 0x%" PRIx64,
>   		      *offset);
>   
> -	pci_dev = sfc_vdpa_adapter_by_dev_handle(dev)->pdev;
> -	doorbell = (uint8_t *)pci_dev->mem_resource[reg.index].addr + *offset;
> +	if (!ops_data->sw_fallback_mode) {
> +		pci_dev = sfc_vdpa_adapter_by_dev_handle(dev)->pdev;
> +		doorbell = (uint8_t *)pci_dev->mem_resource[reg.index].addr +
> +			*offset;
> +		/*
> +		 * virtio-net driver in VM sends queue notifications before
> +		 * vDPA has a chance to setup the queues and notification area,
> +		 * and hence the HW misses these doorbell notifications.
> +		 * Since, it is safe to send duplicate doorbell, send another
> +		 * doorbell from vDPA driver as workaround for this timing issue
> +		 */
> +		rte_write16(qid, doorbell);
> +
> +		/*
> +		 * Update doorbell address, it will come in handy during
> +		 * live-migration.
> +		 */
> +		ops_data->vq_cxt[qid].doorbell = doorbell;
> +	}
>   
> -	/*
> -	 * virtio-net driver in VM sends queue notifications before
> -	 * vDPA has a chance to setup the queues and notification area,
> -	 * and hence the HW misses these doorbell notifications.
> -	 * Since, it is safe to send duplicate doorbell, send another
> -	 * doorbell from vDPA driver as workaround for this timing issue.
> -	 */
> -	rte_write16(qid, doorbell);
> +	sfc_vdpa_info(dev, "vDPA ops get_notify_area :: offset : 0x%" PRIx64,
> +		      *offset);
>   
>   	return 0;
>   }
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> index 5c8e352..dd301ba 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa_ops.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
> @@ -6,8 +6,11 @@
>   #define _SFC_VDPA_OPS_H
>   
>   #include <rte_vdpa.h>
> +#include <vdpa_driver.h>
>   
>   #define SFC_VDPA_MAX_QUEUE_PAIRS		8
> +#define SFC_VDPA_USED_RING_LEN(size) \
> +	((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3)
>   
>   enum sfc_vdpa_context {
>   	SFC_VDPA_AS_VF
> @@ -37,9 +40,10 @@ struct sfc_vdpa_vring_info {
>   typedef struct sfc_vdpa_vq_context_s {
>   	volatile void			*doorbell;
>   	uint8_t				enable;
> -	uint32_t			pidx;
> -	uint32_t			cidx;
>   	efx_virtio_vq_t			*vq;
> +
> +	uint64_t			sw_vq_iova;
> +	uint64_t			sw_vq_size;
>   } sfc_vdpa_vq_context_t;
>   
>   struct sfc_vdpa_ops_data {
> @@ -57,6 +61,13 @@ struct sfc_vdpa_ops_data {
>   
>   	uint16_t			vq_count;
>   	struct sfc_vdpa_vq_context_s	vq_cxt[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
> +
> +	int				epfd;
> +	uint64_t			sw_vq_iova;
> +	bool				sw_fallback_mode;
> +	pthread_t			sw_relay_thread_id;
> +	struct vring			sw_vq[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
> +	int				intr_fd[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
>   };
>   
>   struct sfc_vdpa_ops_data *
  

Patch

diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
index daeb27d..ae522ca 100644
--- a/drivers/vdpa/sfc/sfc_vdpa.h
+++ b/drivers/vdpa/sfc/sfc_vdpa.h
@@ -18,6 +18,7 @@ 
 
 #define SFC_VDPA_MAC_ADDR			"mac"
 #define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
+#define SFC_SW_VRING_IOVA			0x300000000000
 
 /* Broadcast & Unicast MAC filters are supported */
 #define SFC_MAX_SUPPORTED_FILTERS		3
diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.c b/drivers/vdpa/sfc/sfc_vdpa_ops.c
index 6401d4e..1d29ee7 100644
--- a/drivers/vdpa/sfc/sfc_vdpa_ops.c
+++ b/drivers/vdpa/sfc/sfc_vdpa_ops.c
@@ -4,10 +4,13 @@ 
 
 #include <pthread.h>
 #include <unistd.h>
+#include <sys/epoll.h>
 #include <sys/ioctl.h>
 
+#include <rte_eal_paging.h>
 #include <rte_errno.h>
 #include <rte_malloc.h>
+#include <rte_memory.h>
 #include <rte_vdpa.h>
 #include <rte_vfio.h>
 #include <rte_vhost.h>
@@ -33,7 +36,9 @@ 
  */
 #define SFC_VDPA_DEFAULT_FEATURES \
 		((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
-		 (1ULL << VIRTIO_NET_F_MQ))
+		 (1ULL << VIRTIO_NET_F_MQ) | \
+		 (1ULL << VHOST_F_LOG_ALL) | \
+		 (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE))
 
 #define SFC_VDPA_MSIX_IRQ_SET_BUF_LEN \
 		(sizeof(struct vfio_irq_set) + \
@@ -42,6 +47,142 @@ 
 /* It will be used for target VF when calling function is not PF */
 #define SFC_VDPA_VF_NULL		0xFFFF
 
+#define SFC_VDPA_DECODE_FD(data)	(data.u64 >> 32)
+#define SFC_VDPA_DECODE_QID(data)	(data.u32 >> 1)
+#define SFC_VDPA_DECODE_EV_TYPE(data)	(data.u32 & 1)
+
+/*
+ * Create q_num number of epoll events for kickfd interrupts
+ * and q_num/2 events for callfd interrupts. Round up the
+ * total to (q_num * 2) number of events.
+ */
+#define SFC_VDPA_SW_RELAY_EVENT_NUM(q_num)	(q_num * 2)
+
+static inline uint64_t
+sfc_vdpa_encode_ev_data(int type, uint32_t qid, int fd)
+{
+	SFC_VDPA_ASSERT(fd > UINT32_MAX || qid > UINT32_MAX / 2);
+	return type | (qid << 1) | (uint64_t)fd << 32;
+}
+
+static inline void
+sfc_vdpa_queue_relay(struct sfc_vdpa_ops_data *ops_data, uint32_t qid)
+{
+	rte_vdpa_relay_vring_used(ops_data->vid, qid, &ops_data->sw_vq[qid]);
+	rte_vhost_vring_call(ops_data->vid, qid);
+}
+
+static void*
+sfc_vdpa_sw_relay(void *data)
+{
+	uint64_t buf;
+	uint32_t qid, q_num;
+	struct epoll_event ev;
+	struct rte_vhost_vring vring;
+	int nbytes, i, ret, fd, epfd, nfds = 0;
+	struct epoll_event events[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
+	struct sfc_vdpa_ops_data *ops_data = (struct sfc_vdpa_ops_data *)data;
+
+	q_num = rte_vhost_get_vring_num(ops_data->vid);
+	epfd = epoll_create(SFC_VDPA_SW_RELAY_EVENT_NUM(q_num));
+	if (epfd < 0) {
+		sfc_vdpa_log_init(ops_data->dev_handle,
+				  "failed to create epoll instance");
+		goto fail_epoll;
+	}
+	ops_data->epfd = epfd;
+
+	vring.kickfd = -1;
+	for (qid = 0; qid < q_num; qid++) {
+		ev.events = EPOLLIN | EPOLLPRI;
+		ret = rte_vhost_get_vhost_vring(ops_data->vid, qid, &vring);
+		if (ret != 0) {
+			sfc_vdpa_log_init(ops_data->dev_handle,
+					  "rte_vhost_get_vhost_vring error %s",
+					  strerror(errno));
+			goto fail_vring;
+		}
+
+		ev.data.u64 = sfc_vdpa_encode_ev_data(0, qid, vring.kickfd);
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, vring.kickfd, &ev) < 0) {
+			sfc_vdpa_log_init(ops_data->dev_handle,
+					  "epoll add error: %s",
+					  strerror(errno));
+			goto fail_epoll_add;
+		}
+	}
+
+	/*
+	 * Register intr_fd created by vDPA driver in lieu of qemu's callfd
+	 * to intercept rx queue notification. So that we can monitor rx
+	 * notifications and issue rte_vdpa_relay_vring_used()
+	 */
+	for (qid = 0; qid < q_num; qid += 2) {
+		fd = ops_data->intr_fd[qid];
+		ev.events = EPOLLIN | EPOLLPRI;
+		ev.data.u64 = sfc_vdpa_encode_ev_data(1, qid, fd);
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+			sfc_vdpa_log_init(ops_data->dev_handle,
+					  "epoll add error: %s",
+					  strerror(errno));
+			goto fail_epoll_add;
+		}
+		sfc_vdpa_queue_relay(ops_data, qid);
+	}
+
+	/*
+	 * virtio driver in VM was continuously sending queue notifications
+	 * while were setting up software vrings and hence the HW misses
+	 * these doorbell notifications. Since, it is safe to send duplicate
+	 * doorbell, send another doorbell from vDPA driver.
+	 */
+	for (qid = 0; qid < q_num; qid++)
+		rte_write16(qid, ops_data->vq_cxt[qid].doorbell);
+
+	for (;;) {
+		nfds = epoll_wait(epfd, events,
+				  SFC_VDPA_SW_RELAY_EVENT_NUM(q_num), -1);
+		if (nfds < 0) {
+			if (errno == EINTR)
+				continue;
+			sfc_vdpa_log_init(ops_data->dev_handle,
+					  "epoll_wait return fail\n");
+			goto fail_epoll_wait;
+		}
+
+		for (i = 0; i < nfds; i++) {
+			fd = SFC_VDPA_DECODE_FD(events[i].data);
+			/* Ensure kickfd is not busy before proceeding */
+			for (;;) {
+				nbytes = read(fd, &buf, 8);
+				if (nbytes < 0) {
+					if (errno == EINTR ||
+					    errno == EWOULDBLOCK ||
+					    errno == EAGAIN)
+						continue;
+				}
+				break;
+			}
+
+			qid = SFC_VDPA_DECODE_QID(events[i].data);
+			if (SFC_VDPA_DECODE_EV_TYPE(events[i].data))
+				sfc_vdpa_queue_relay(ops_data, qid);
+			else
+				rte_write16(qid, ops_data->vq_cxt[qid].doorbell);
+		}
+	}
+
+	return NULL;
+
+fail_epoll:
+fail_vring:
+fail_epoll_add:
+fail_epoll_wait:
+	close(epfd);
+	ops_data->epfd = -1;
+	return NULL;
+}
+
 static int
 sfc_vdpa_get_device_features(struct sfc_vdpa_ops_data *ops_data)
 {
@@ -99,7 +240,7 @@ 
 static int
 sfc_vdpa_enable_vfio_intr(struct sfc_vdpa_ops_data *ops_data)
 {
-	int rc;
+	int rc, fd;
 	int *irq_fd_ptr;
 	int vfio_dev_fd;
 	uint32_t i, num_vring;
@@ -131,6 +272,17 @@ 
 			return -1;
 
 		irq_fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+		if (ops_data->sw_fallback_mode && !(i & 1)) {
+			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+			if (fd < 0) {
+				sfc_vdpa_err(ops_data->dev_handle,
+					     "failed to create eventfd");
+				goto fail_eventfd;
+			}
+			ops_data->intr_fd[i] = fd;
+			irq_fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+		} else
+			ops_data->intr_fd[i] = -1;
 	}
 
 	rc = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -138,16 +290,26 @@ 
 		sfc_vdpa_err(ops_data->dev_handle,
 			     "error enabling MSI-X interrupts: %s",
 			     strerror(errno));
-		return -1;
+		goto fail_ioctl;
 	}
 
 	return 0;
+
+fail_ioctl:
+fail_eventfd:
+	for (i = 0; i < num_vring; i++) {
+		if (ops_data->intr_fd[i] != -1) {
+			close(ops_data->intr_fd[i]);
+			ops_data->intr_fd[i] = -1;
+		}
+	}
+	return -1;
 }
 
 static int
 sfc_vdpa_disable_vfio_intr(struct sfc_vdpa_ops_data *ops_data)
 {
-	int rc;
+	int rc, i;
 	int vfio_dev_fd;
 	struct vfio_irq_set irq_set;
 	void *dev;
@@ -161,6 +323,12 @@ 
 	irq_set.index = VFIO_PCI_MSIX_IRQ_INDEX;
 	irq_set.start = 0;
 
+	for (i = 0; i < ops_data->vq_count; i++) {
+		if (ops_data->intr_fd[i] >= 0)
+			close(ops_data->intr_fd[i]);
+		ops_data->intr_fd[i] = -1;
+	}
+
 	rc = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set);
 	if (rc) {
 		sfc_vdpa_err(ops_data->dev_handle,
@@ -223,12 +391,15 @@ 
 static int
 sfc_vdpa_virtq_start(struct sfc_vdpa_ops_data *ops_data, int vq_num)
 {
-	int rc;
+	int rc, fd;
+	uint64_t size;
 	uint32_t doorbell;
 	efx_virtio_vq_t *vq;
+	void *vring_buf, *dev;
 	struct sfc_vdpa_vring_info vring;
 	efx_virtio_vq_cfg_t vq_cfg;
 	efx_virtio_vq_dyncfg_t vq_dyncfg;
+	uint64_t sw_vq_iova = ops_data->sw_vq_iova;
 
 	vq = ops_data->vq_cxt[vq_num].vq;
 	if (vq == NULL)
@@ -241,6 +412,33 @@ 
 		goto fail_vring_info;
 	}
 
+	if (ops_data->sw_fallback_mode) {
+		size = vring_size(vring.size, rte_mem_page_size());
+		size = RTE_ALIGN_CEIL(size, rte_mem_page_size());
+		vring_buf = rte_zmalloc("vdpa", size, rte_mem_page_size());
+		vring_init(&ops_data->sw_vq[vq_num], vring.size, vring_buf,
+			   rte_mem_page_size());
+
+		dev = ops_data->dev_handle;
+		fd = sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
+		rc = rte_vfio_container_dma_map(fd,
+						(uint64_t)(uintptr_t)vring_buf,
+						sw_vq_iova, size);
+
+		/* Direct I/O for Tx queue, relay for Rx queue */
+		if (!(vq_num & 1))
+			vring.used = sw_vq_iova +
+				(char *)ops_data->sw_vq[vq_num].used -
+				(char *)ops_data->sw_vq[vq_num].desc;
+
+		ops_data->sw_vq[vq_num].used->idx = vring.last_used_idx;
+		ops_data->sw_vq[vq_num].avail->idx = vring.last_avail_idx;
+
+		ops_data->vq_cxt[vq_num].sw_vq_iova = sw_vq_iova;
+		ops_data->vq_cxt[vq_num].sw_vq_size = size;
+		ops_data->sw_vq_iova += size;
+	}
+
 	vq_cfg.evvc_target_vf = SFC_VDPA_VF_NULL;
 
 	/* even virtqueue for RX and odd for TX */
@@ -309,9 +507,12 @@ 
 static int
 sfc_vdpa_virtq_stop(struct sfc_vdpa_ops_data *ops_data, int vq_num)
 {
-	int rc;
+	int rc, fd;
+	void *dev, *buf;
+	uint64_t size, len, iova;
 	efx_virtio_vq_dyncfg_t vq_idx;
 	efx_virtio_vq_t *vq;
+	struct rte_vhost_vring vring;
 
 	if (ops_data->vq_cxt[vq_num].enable != B_TRUE)
 		return -1;
@@ -320,12 +521,34 @@ 
 	if (vq == NULL)
 		return -1;
 
+	if (ops_data->sw_fallback_mode) {
+		dev = ops_data->dev_handle;
+		fd = sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
+		/* synchronize remaining new used entries if any */
+		if (!(vq_num & 1))
+			sfc_vdpa_queue_relay(ops_data, vq_num);
+
+		rte_vhost_get_vhost_vring(ops_data->vid, vq_num, &vring);
+		len = SFC_VDPA_USED_RING_LEN(vring.size);
+		rte_vhost_log_used_vring(ops_data->vid, vq_num, 0, len);
+
+		buf = ops_data->sw_vq[vq_num].desc;
+		size = ops_data->vq_cxt[vq_num].sw_vq_size;
+		iova = ops_data->vq_cxt[vq_num].sw_vq_iova;
+		rte_vfio_container_dma_unmap(fd, (uint64_t)(uintptr_t)buf,
+				iova, size);
+	}
+
 	/* stop the vq */
 	rc = efx_virtio_qstop(vq, &vq_idx);
 	if (rc == 0) {
-		ops_data->vq_cxt[vq_num].cidx = vq_idx.evvd_vq_used_idx;
-		ops_data->vq_cxt[vq_num].pidx = vq_idx.evvd_vq_avail_idx;
+		if (ops_data->sw_fallback_mode)
+			vq_idx.evvd_vq_avail_idx = vq_idx.evvd_vq_used_idx;
+		rte_vhost_set_vring_base(ops_data->vid, vq_num,
+					 vq_idx.evvd_vq_avail_idx,
+					 vq_idx.evvd_vq_used_idx);
 	}
+
 	ops_data->vq_cxt[vq_num].enable = B_FALSE;
 
 	return rc;
@@ -450,7 +673,11 @@ 
 
 	SFC_EFX_ASSERT(ops_data->state == SFC_VDPA_STATE_CONFIGURED);
 
-	sfc_vdpa_log_init(ops_data->dev_handle, "entry");
+	if (ops_data->sw_fallback_mode) {
+		sfc_vdpa_log_init(ops_data->dev_handle,
+				  "Trying to start VDPA with SW I/O relay");
+		ops_data->sw_vq_iova = SFC_SW_VRING_IOVA;
+	}
 
 	ops_data->state = SFC_VDPA_STATE_STARTING;
 
@@ -675,6 +902,7 @@ 
 sfc_vdpa_dev_close(int vid)
 {
 	int ret;
+	void *status;
 	struct rte_vdpa_device *vdpa_dev;
 	struct sfc_vdpa_ops_data *ops_data;
 
@@ -707,7 +935,23 @@ 
 	}
 	ops_data->is_notify_thread_started = false;
 
+	if (ops_data->sw_fallback_mode) {
+		ret = pthread_cancel(ops_data->sw_relay_thread_id);
+		if (ret != 0)
+			sfc_vdpa_err(ops_data->dev_handle,
+				     "failed to cancel LM relay thread: %s",
+				     rte_strerror(ret));
+
+		ret = pthread_join(ops_data->sw_relay_thread_id, &status);
+		if (ret != 0)
+			sfc_vdpa_err(ops_data->dev_handle,
+				     "failed to join LM relay thread: %s",
+				     rte_strerror(ret));
+	}
+
 	sfc_vdpa_stop(ops_data);
+	ops_data->sw_fallback_mode = false;
+
 	sfc_vdpa_close(ops_data);
 
 	sfc_vdpa_adapter_unlock(ops_data->dev_handle);
@@ -774,9 +1018,49 @@ 
 static int
 sfc_vdpa_set_features(int vid)
 {
-	RTE_SET_USED(vid);
+	int ret;
+	uint64_t features = 0;
+	struct rte_vdpa_device *vdpa_dev;
+	struct sfc_vdpa_ops_data *ops_data;
 
-	return -1;
+	vdpa_dev = rte_vhost_get_vdpa_device(vid);
+	ops_data = sfc_vdpa_get_data_by_dev(vdpa_dev);
+	if (ops_data == NULL)
+		return -1;
+
+	rte_vhost_get_negotiated_features(vid, &features);
+
+	if (!RTE_VHOST_NEED_LOG(features))
+		return -1;
+
+	sfc_vdpa_info(ops_data->dev_handle, "live-migration triggered");
+
+	sfc_vdpa_adapter_lock(ops_data->dev_handle);
+
+	/* Stop HW Offload and unset host notifier */
+	sfc_vdpa_stop(ops_data);
+	if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false) != 0)
+		sfc_vdpa_info(ops_data->dev_handle,
+			      "vDPA (%s): Failed to clear host notifier",
+			      ops_data->vdpa_dev->device->name);
+
+	/* Restart vDPA with SW relay on RX queue */
+	ops_data->sw_fallback_mode = true;
+	sfc_vdpa_start(ops_data);
+	ret = pthread_create(&ops_data->sw_relay_thread_id, NULL,
+			     sfc_vdpa_sw_relay,	(void *)ops_data);
+	if (ret != 0)
+		sfc_vdpa_err(ops_data->dev_handle,
+			     "failed to create rx_relay thread: %s",
+			     rte_strerror(ret));
+
+	if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
+		sfc_vdpa_info(ops_data->dev_handle, "notifier setup failed!");
+
+	sfc_vdpa_adapter_unlock(ops_data->dev_handle);
+	sfc_vdpa_info(ops_data->dev_handle, "SW fallback setup done!");
+
+	return 0;
 }
 
 static int
@@ -860,17 +1144,28 @@ 
 	sfc_vdpa_info(dev, "vDPA ops get_notify_area :: offset : 0x%" PRIx64,
 		      *offset);
 
-	pci_dev = sfc_vdpa_adapter_by_dev_handle(dev)->pdev;
-	doorbell = (uint8_t *)pci_dev->mem_resource[reg.index].addr + *offset;
+	if (!ops_data->sw_fallback_mode) {
+		pci_dev = sfc_vdpa_adapter_by_dev_handle(dev)->pdev;
+		doorbell = (uint8_t *)pci_dev->mem_resource[reg.index].addr +
+			*offset;
+		/*
+		 * virtio-net driver in VM sends queue notifications before
+		 * vDPA has a chance to setup the queues and notification area,
+		 * and hence the HW misses these doorbell notifications.
+		 * Since, it is safe to send duplicate doorbell, send another
+		 * doorbell from vDPA driver as workaround for this timing issue
+		 */
+		rte_write16(qid, doorbell);
+
+		/*
+		 * Update doorbell address, it will come in handy during
+		 * live-migration.
+		 */
+		ops_data->vq_cxt[qid].doorbell = doorbell;
+	}
 
-	/*
-	 * virtio-net driver in VM sends queue notifications before
-	 * vDPA has a chance to setup the queues and notification area,
-	 * and hence the HW misses these doorbell notifications.
-	 * Since, it is safe to send duplicate doorbell, send another
-	 * doorbell from vDPA driver as workaround for this timing issue.
-	 */
-	rte_write16(qid, doorbell);
+	sfc_vdpa_info(dev, "vDPA ops get_notify_area :: offset : 0x%" PRIx64,
+		      *offset);
 
 	return 0;
 }
diff --git a/drivers/vdpa/sfc/sfc_vdpa_ops.h b/drivers/vdpa/sfc/sfc_vdpa_ops.h
index 5c8e352..dd301ba 100644
--- a/drivers/vdpa/sfc/sfc_vdpa_ops.h
+++ b/drivers/vdpa/sfc/sfc_vdpa_ops.h
@@ -6,8 +6,11 @@ 
 #define _SFC_VDPA_OPS_H
 
 #include <rte_vdpa.h>
+#include <vdpa_driver.h>
 
 #define SFC_VDPA_MAX_QUEUE_PAIRS		8
+#define SFC_VDPA_USED_RING_LEN(size) \
+	((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3)
 
 enum sfc_vdpa_context {
 	SFC_VDPA_AS_VF
@@ -37,9 +40,10 @@  struct sfc_vdpa_vring_info {
 typedef struct sfc_vdpa_vq_context_s {
 	volatile void			*doorbell;
 	uint8_t				enable;
-	uint32_t			pidx;
-	uint32_t			cidx;
 	efx_virtio_vq_t			*vq;
+
+	uint64_t			sw_vq_iova;
+	uint64_t			sw_vq_size;
 } sfc_vdpa_vq_context_t;
 
 struct sfc_vdpa_ops_data {
@@ -57,6 +61,13 @@  struct sfc_vdpa_ops_data {
 
 	uint16_t			vq_count;
 	struct sfc_vdpa_vq_context_s	vq_cxt[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
+
+	int				epfd;
+	uint64_t			sw_vq_iova;
+	bool				sw_fallback_mode;
+	pthread_t			sw_relay_thread_id;
+	struct vring			sw_vq[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
+	int				intr_fd[SFC_VDPA_MAX_QUEUE_PAIRS * 2];
 };
 
 struct sfc_vdpa_ops_data *