[dpdk-dev,v3,17/19] vhost-user: iommu: postpone device creation until ring are mapped

Message ID 20171005083627.27828-18-maxime.coquelin@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Maxime Coquelin Oct. 5, 2017, 8:36 a.m. UTC
  Translating the start addresses of the rings is not enough, we need to
be sure all the ring is made available by the guest.

It depends on the size of the rings, which is not known on SET_VRING_ADDR
reception. Furthermore, we need to be be safe against vring pages
invalidates.

This patch introduces a new access_ok flag per virtqueue, which is set
when all the rings are mapped, and cleared as soon as a page used by a
ring is invalidated. The invalidation part is implemented in a following
patch.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c      | 37 ++++++++++++++++++++++++++
 lib/librte_vhost/vhost.h      |  2 ++
 lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++------------
 lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++----------------
 4 files changed, 121 insertions(+), 40 deletions(-)
  

Comments

Yao, Lei A Nov. 2, 2017, 7:21 a.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Maxime Coquelin
> Sent: Thursday, October 5, 2017 4:36 PM
> To: dev@dpdk.org; Horton, Remy <remy.horton@intel.com>; Bie, Tiwei
> <tiwei.bie@intel.com>; yliu@fridaylinux.org
> Cc: mst@redhat.com; jfreiman@redhat.com; vkaplans@redhat.com;
> jasowang@redhat.com; Maxime Coquelin <maxime.coquelin@redhat.com>
> Subject: [dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device
> creation until ring are mapped
> 
> Translating the start addresses of the rings is not enough, we need to
> be sure all the ring is made available by the guest.
> 
> It depends on the size of the rings, which is not known on SET_VRING_ADDR
> reception. Furthermore, we need to be be safe against vring pages
> invalidates.
> 
> This patch introduces a new access_ok flag per virtqueue, which is set
> when all the rings are mapped, and cleared as soon as a page used by a
> ring is invalidated. The invalidation part is implemented in a following
> patch.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.c      | 37 ++++++++++++++++++++++++++
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/vhost_user.c | 62 +++++++++++++++++++++++++++++++--
> ----------
>  lib/librte_vhost/virtio_net.c | 60 +++++++++++++++++++++++++-------------
> ---
>  4 files changed, 121 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 0e2ad3322..ef54835a6 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -135,6 +135,43 @@ free_device(struct virtio_net *dev)
>  	rte_free(dev);
>  }
> 
> +int
> +vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
> +	uint64_t size;
> +
> +	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
> +		goto out;
> +
> +	size = sizeof(struct vring_desc) * vq->size;
> +	vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.desc_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->desc)
> +		return -1;
> +
> +	size = sizeof(struct vring_avail);
> +	size += sizeof(uint16_t) * vq->size;
> +	vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.avail_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->avail)
> +		return -1;
> +
> +	size = sizeof(struct vring_used);
> +	size += sizeof(struct vring_used_elem) * vq->size;
> +	vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
> +						vq-
> >ring_addrs.used_user_addr,
> +						size, VHOST_ACCESS_RW);
> +	if (!vq->used)
> +		return -1;
> +
> +out:
> +	vq->access_ok = 1;
> +
> +	return 0;
> +}
> +
>  static void
>  init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>  {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 903da5db5..b3fe6bb8e 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -113,6 +113,7 @@ struct vhost_virtqueue {
>  	/* Currently unused as polling mode is enabled */
>  	int			kickfd;
>  	int			enabled;
> +	int			access_ok;
> 
>  	/* Physical address of used ring, for logging */
>  	uint64_t		log_guest_addr;
> @@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
> 
>  uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
> vhost_virtqueue *vq,
>  			uint64_t iova, uint64_t size, uint8_t perm);
> +int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
> 
>  static __rte_always_inline uint64_t
>  vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 90b209764..dd6562fd8 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev,
> VhostUserMsg *msg)
>  	 */
>  	memcpy(&vq->ring_addrs, addr, sizeof(*addr));
> 
> +	vq->desc = NULL;
> +	vq->avail = NULL;
> +	vq->used = NULL;
> +
> +	vq->access_ok = 0;
> +
>  	return 0;
>  }
> 
> @@ -407,10 +413,10 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>  	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->desc_user_addr, sizeof(struct vring_desc));
>  	if (vq->desc == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find desc ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	dev = numa_realloc(dev, vq_index);
> @@ -419,19 +425,19 @@ static struct virtio_net
> *translate_ring_addresses(struct virtio_net *dev,
>  	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->avail_user_addr, sizeof(struct vring_avail));
>  	if (vq->avail == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find avail ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
>  			vq, addr->used_user_addr, sizeof(struct
> vring_used));
>  	if (vq->used == 0) {
> -		RTE_LOG(ERR, VHOST_CONFIG,
> +		RTE_LOG(DEBUG, VHOST_CONFIG,
>  			"(%d) failed to find used ring address.\n",
>  			dev->vid);
> -		return NULL;
> +		return dev;
>  	}
> 
>  	if (vq->last_used_idx != vq->used->idx) {
> @@ -677,7 +683,7 @@ vhost_user_set_mem_table(struct virtio_net *dev,
> struct VhostUserMsg *pmsg)
>  static int
>  vq_is_ready(struct vhost_virtqueue *vq)
>  {
> -	return vq && vq->desc   &&
> +	return vq && vq->desc && vq->avail && vq->used &&
>  	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
>  	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
>  }
> @@ -986,8 +992,29 @@ vhost_user_set_req_fd(struct virtio_net *dev,
> struct VhostUserMsg *msg)
>  }
> 
>  static int
> -vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
> +is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg
> *imsg)
>  {
> +	struct vhost_vring_addr *ra;
> +	uint64_t start, end;
> +
> +	start = imsg->iova;
> +	end = start + imsg->size;
> +
> +	ra = &vq->ring_addrs;
> +	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
> +		return 1;
> +	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
> +		return 1;
> +	if (ra->used_user_addr >= start && ra->used_user_addr < end)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int
> +vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
> +{
> +	struct virtio_net *dev = *pdev;
>  	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
>  	uint16_t i;
>  	uint64_t vva;
> @@ -1003,6 +1030,9 @@ vhost_user_iotlb_msg(struct virtio_net *dev,
> struct VhostUserMsg *msg)
> 
>  			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
>  					imsg->size, imsg->perm);
> +
> +			if (is_vring_iotlb_update(vq, imsg))
> +				*pdev = dev = translate_ring_addresses(dev,
> i);
>  		}
>  		break;
>  	case VHOST_IOTLB_INVALIDATE:
> @@ -1151,8 +1181,12 @@ vhost_user_msg_handler(int vid, int fd)
>  	}
> 
>  	ret = 0;
> -	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> -		vhost_message_str[msg.request]);
> +	if (msg.request != VHOST_USER_IOTLB_MSG)
> +		RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +			vhost_message_str[msg.request]);
> +	else
> +		RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
> +			vhost_message_str[msg.request]);
> 
>  	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
>  	if (ret < 0) {
> @@ -1254,7 +1288,7 @@ vhost_user_msg_handler(int vid, int fd)
>  		break;
> 
>  	case VHOST_USER_IOTLB_MSG:
> -		ret = vhost_user_iotlb_msg(dev, &msg);
> +		ret = vhost_user_iotlb_msg(&dev, &msg);
>  		break;
> 
>  	default:
> @@ -1263,12 +1297,6 @@ vhost_user_msg_handler(int vid, int fd)
> 
>  	}
> 
> -	/*
> -	 * The virtio_net struct might have been reallocated on a different
> -	 * NUMA node, so dev pointer might no more be valid.
> -	 */
> -	dev = get_device(vid);
> -
>  	if (msg.flags & VHOST_USER_NEED_REPLY) {
>  		msg.payload.u64 = !!ret;
>  		msg.size = sizeof(msg.payload.u64);
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index cdfb6f957..b75c93cf1 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -329,13 +329,23 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	if (unlikely(vq->enabled == 0))
>  		return 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0)) {
> +		if (unlikely(vring_translate(dev, vq) < 0)) {
> +			count = 0;
> +			goto out;
> +		}
> +	}
> +
>  	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
>  	start_idx = vq->last_used_idx;
>  	free_entries = avail_idx - start_idx;
>  	count = RTE_MIN(count, free_entries);
>  	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
>  	if (count == 0)
> -		return 0;
> +		goto out;
> 
>  	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
>  		dev->vid, start_idx, start_idx + count);
> @@ -356,10 +366,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	}
> 
>  	rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (i = 0; i < count; i++) {
>  		uint16_t desc_idx = desc_indexes[i];
>  		int err;
> @@ -394,9 +400,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>  	do_data_copy_enqueue(dev, vq);
> 
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	rte_smp_wmb();
> 
>  	*(volatile uint16_t *)&vq->used->idx += count;
> @@ -412,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t
> queue_id,
>  	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>  			&& (vq->callfd >= 0))
>  		eventfd_write(vq->callfd, (eventfd_t)1);
> +out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	return count;
>  }
> 
> @@ -647,9 +654,16 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>  	if (unlikely(vq->enabled == 0))
>  		return 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0))
> +		if (unlikely(vring_translate(dev, vq) < 0))
> +			goto out;
> +
>  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>  	if (count == 0)
> -		return 0;
> +		goto out;
> 
>  	vq->batch_copy_nb_elems = 0;
> 
> @@ -657,10 +671,6 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
> 
>  	vq->shadow_used_idx = 0;
>  	avail_head = *((volatile uint16_t *)&vq->avail->idx);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
>  		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev-
> >vhost_hlen;
> 
> @@ -689,9 +699,6 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t
> queue_id,
> 
>  	do_data_copy_enqueue(dev, vq);
> 
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	if (likely(vq->shadow_used_idx)) {
>  		flush_shadow_used_ring(dev, vq);
> 
> @@ -704,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev,
> uint16_t queue_id,
>  			eventfd_write(vq->callfd, (eventfd_t)1);
>  	}
> 
> +out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	return pkt_idx;
>  }
> 
> @@ -1173,6 +1184,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>  	vq->batch_copy_nb_elems = 0;
> 
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0))
> +		if (unlikely(vring_translate(dev, vq) < 0))
> +			goto out;
> +
>  	if (unlikely(dev->dequeue_zero_copy)) {
>  		struct zcopy_mbuf *zmbuf, *next;
>  		int nr_updated = 0;
> @@ -1262,10 +1280,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
> 
>  	/* Prefetch descriptor index. */
>  	rte_prefetch0(&vq->desc[desc_indexes[0]]);
> -
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_lock(vq);
> -
>  	for (i = 0; i < count; i++) {
>  		struct vring_desc *desc;
>  		uint16_t sz, idx;
> @@ -1329,9 +1343,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>  			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
>  		}
>  	}
> -	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> -		vhost_user_iotlb_rd_unlock(vq);
> -
>  	vq->last_avail_idx += i;
> 
>  	if (likely(dev->dequeue_zero_copy == 0)) {
> @@ -1341,6 +1352,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t
> queue_id,
>  	}
> 
>  out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
>  	if (unlikely(rarp_mbuf != NULL)) {
>  		/*
>  		 * Inject it to the head of "pkts" array, so that switch's mac
> --
> 2.13.6
Hi, Maxime

I met one issue with your patch set during the v17.11 test.
The test scenario is following, 
1.	Bind one NIC, use test-pmd set vhost-user with 2 queue
usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \
--vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 --rss-ip
2.	Launch qemu with  virtio device which has 2 queue 
3.	In VM, launch testpmd with virtio-pmd using only 1 queue.
x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \
--rxq=1 --txq=1 --rss-ip --nb-cores=1

First, 
commit 09927b5249694bad1c094d3068124673722e6b8f
vhost: translate ring addresses when IOMMU enabled
The patch causes no traffic in PVP test. but link status is still up in vhost-user.

Second, 
eefac9536a901a1f0bb52aa3b6fec8f375f09190 
vhost: postpone device creation until rings are mapped
The patch causes link status "down" in vhost-user.

Could you have a check at your side? Thanks.

BRs
Lei
  
Maxime Coquelin Nov. 2, 2017, 8:21 a.m. UTC | #2
Hi Lei,

On 11/02/2017 08:21 AM, Yao, Lei A wrote:
> 
...
> Hi, Maxime > I met one issue with your patch set during the v17.11 test.

Is it with v17.11-rc2 or -rc1?

> The test scenario is following,
> 1.	Bind one NIC, use test-pmd set vhost-user with 2 queue
> usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
> ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 1024,1024 \
> --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 --nb-cores=2 --rss-ip
> 2.	Launch qemu with  virtio device which has 2 queue
> 3.	In VM, launch testpmd with virtio-pmd using only 1 queue.
> x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i --txqflags=0xf01 \
> --rxq=1 --txq=1 --rss-ip --nb-cores=1
> 
> First,
> commit 09927b5249694bad1c094d3068124673722e6b8f
> vhost: translate ring addresses when IOMMU enabled
> The patch causes no traffic in PVP test. but link status is still up in vhost-user.
> 
> Second,
> eefac9536a901a1f0bb52aa3b6fec8f375f09190
> vhost: postpone device creation until rings are mapped
> The patch causes link status "down" in vhost-user.
> 
> Could you have a check at your side? Thanks.

Sure, could you please on your side provide more info?
1. Host testpmd logs
2. Qemu version and cmdline

Thanks,
Maxime
> BRs
> Lei
>
  
Maxime Coquelin Nov. 2, 2017, 4:02 p.m. UTC | #3
On 11/02/2017 09:21 AM, Maxime Coquelin wrote:
> Hi Lei,
> 
> On 11/02/2017 08:21 AM, Yao, Lei A wrote:
>>
> ...
>> Hi, Maxime > I met one issue with your patch set during the v17.11 test.
> 
> Is it with v17.11-rc2 or -rc1?
> 
>> The test scenario is following,
>> 1.    Bind one NIC, use test-pmd set vhost-user with 2 queue
>> usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
>> ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 
>> 1024,1024 \
>> --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 
>> --nb-cores=2 --rss-ip
>> 2.    Launch qemu with  virtio device which has 2 queue
>> 3.    In VM, launch testpmd with virtio-pmd using only 1 queue.
>> x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i 
>> --txqflags=0xf01 \
>> --rxq=1 --txq=1 --rss-ip --nb-cores=1
>>
>> First,
>> commit 09927b5249694bad1c094d3068124673722e6b8f
>> vhost: translate ring addresses when IOMMU enabled
>> The patch causes no traffic in PVP test. but link status is still up 
>> in vhost-user.
>>
>> Second,
>> eefac9536a901a1f0bb52aa3b6fec8f375f09190
>> vhost: postpone device creation until rings are mapped
>> The patch causes link status "down" in vhost-user.

I reproduced this one, and understand why link status remains down.
My series did fixed a potential issue Michael raised, that the vring
addresses should only interpreted once the ring is enabled.
When VHOST_USER_F_PROTOCOL_FEATURES is negotiated, the rings addrs are
translated when ring is enabled via VHOST_USER_SET_VRING_ENABLE.
When not negotiated, the ring is considered started enabled, so
translation is done at VHOST_USER_SET_VRING_KICK time.

In your case, protocol features are negotiated, so the ring addresses
are translated at enable time. The problem is that the code considers
the device is ready once addresses for all the rings are translated.
But since only the first pair of rings is used, it never happens, and
the link remains down.

One of the reason this check is done is to avoid starting the PMD
threads before the addresses are translated in case of NUMA
reallocation, as virtqueues and virtio-net device structs can be
reallocated on a different node.

I think the right fix would be to only perform NUMA reallocation for
vring 0, as today we would end-up reallocating virtio-net struct
mulitple time if VQs are on different NUMA nodes.

Doing that, we could then consider the device is ready if vring 0 is
enabled and its ring addresses are translated, and if other vrings have
been kicked.

I'll post a patch shortly implementing this idea.

Thanks,
Maxime
  
Maxime Coquelin Nov. 3, 2017, 8:25 a.m. UTC | #4
On 11/02/2017 05:02 PM, Maxime Coquelin wrote:
> 
> 
> On 11/02/2017 09:21 AM, Maxime Coquelin wrote:
>> Hi Lei,
>>
>> On 11/02/2017 08:21 AM, Yao, Lei A wrote:
>>>
>> ...
>>> Hi, Maxime > I met one issue with your patch set during the v17.11 test.
>>
>> Is it with v17.11-rc2 or -rc1?
>>
>>> The test scenario is following,
>>> 1.    Bind one NIC, use test-pmd set vhost-user with 2 queue
>>> usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
>>> ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4 --socket-mem 
>>> 1024,1024 \
>>> --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2 --txq=2 
>>> --nb-cores=2 --rss-ip
>>> 2.    Launch qemu with  virtio device which has 2 queue
>>> 3.    In VM, launch testpmd with virtio-pmd using only 1 queue.
>>> x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i 
>>> --txqflags=0xf01 \
>>> --rxq=1 --txq=1 --rss-ip --nb-cores=1
>>>
>>> First,
>>> commit 09927b5249694bad1c094d3068124673722e6b8f
>>> vhost: translate ring addresses when IOMMU enabled
>>> The patch causes no traffic in PVP test. but link status is still up 
>>> in vhost-user.
>>>
>>> Second,
>>> eefac9536a901a1f0bb52aa3b6fec8f375f09190
>>> vhost: postpone device creation until rings are mapped
>>> The patch causes link status "down" in vhost-user.
> 
> I reproduced this one, and understand why link status remains down.
> My series did fixed a potential issue Michael raised, that the vring
> addresses should only interpreted once the ring is enabled.
> When VHOST_USER_F_PROTOCOL_FEATURES is negotiated, the rings addrs are
> translated when ring is enabled via VHOST_USER_SET_VRING_ENABLE.
> When not negotiated, the ring is considered started enabled, so
> translation is done at VHOST_USER_SET_VRING_KICK time.
> 
> In your case, protocol features are negotiated, so the ring addresses
> are translated at enable time. The problem is that the code considers
> the device is ready once addresses for all the rings are translated.
> But since only the first pair of rings is used, it never happens, and
> the link remains down.
> 
> One of the reason this check is done is to avoid starting the PMD
> threads before the addresses are translated in case of NUMA
> reallocation, as virtqueues and virtio-net device structs can be
> reallocated on a different node.
> 
> I think the right fix would be to only perform NUMA reallocation for
> vring 0, as today we would end-up reallocating virtio-net struct
> mulitple time if VQs are on different NUMA nodes.
> 
> Doing that, we could then consider the device is ready if vring 0 is
> enabled and its ring addresses are translated, and if other vrings have
> been kicked.
> 
> I'll post a patch shortly implementing this idea.

The proposed solution doesn't work, because disabled queues get accessed 
at device start time:

int
rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
..
	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
	return 0;
}

The above function being called in Vhost PMD for every queues, enabled
or not. While we could fix the PMD, it could break other applications
using the Vhost lib API directly, so we cannot translate at enable
time reliably.

I think we may be a bit less conservative, and postpone addresses
translation at kick time, whatever VHOST_USER_F_PROTOCOL_FEATURES is
negotiated or not.

Regards,
Maxime

> Thanks,
> Maxime
  
Michael S. Tsirkin Nov. 3, 2017, 3:15 p.m. UTC | #5
On Fri, Nov 03, 2017 at 09:25:58AM +0100, Maxime Coquelin wrote:
> 
> 
> On 11/02/2017 05:02 PM, Maxime Coquelin wrote:
> > 
> > 
> > On 11/02/2017 09:21 AM, Maxime Coquelin wrote:
> > > Hi Lei,
> > > 
> > > On 11/02/2017 08:21 AM, Yao, Lei A wrote:
> > > > 
> > > ...
> > > > Hi, Maxime > I met one issue with your patch set during the v17.11 test.
> > > 
> > > Is it with v17.11-rc2 or -rc1?
> > > 
> > > > The test scenario is following,
> > > > 1.    Bind one NIC, use test-pmd set vhost-user with 2 queue
> > > > usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
> > > > ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4
> > > > --socket-mem 1024,1024 \
> > > > --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2
> > > > --txq=2 --nb-cores=2 --rss-ip
> > > > 2.    Launch qemu with  virtio device which has 2 queue
> > > > 3.    In VM, launch testpmd with virtio-pmd using only 1 queue.
> > > > x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i
> > > > --txqflags=0xf01 \
> > > > --rxq=1 --txq=1 --rss-ip --nb-cores=1
> > > > 
> > > > First,
> > > > commit 09927b5249694bad1c094d3068124673722e6b8f
> > > > vhost: translate ring addresses when IOMMU enabled
> > > > The patch causes no traffic in PVP test. but link status is
> > > > still up in vhost-user.
> > > > 
> > > > Second,
> > > > eefac9536a901a1f0bb52aa3b6fec8f375f09190
> > > > vhost: postpone device creation until rings are mapped
> > > > The patch causes link status "down" in vhost-user.
> > 
> > I reproduced this one, and understand why link status remains down.
> > My series did fixed a potential issue Michael raised, that the vring
> > addresses should only interpreted once the ring is enabled.
> > When VHOST_USER_F_PROTOCOL_FEATURES is negotiated, the rings addrs are
> > translated when ring is enabled via VHOST_USER_SET_VRING_ENABLE.
> > When not negotiated, the ring is considered started enabled, so
> > translation is done at VHOST_USER_SET_VRING_KICK time.
> > 
> > In your case, protocol features are negotiated, so the ring addresses
> > are translated at enable time. The problem is that the code considers
> > the device is ready once addresses for all the rings are translated.
> > But since only the first pair of rings is used, it never happens, and
> > the link remains down.
> > 
> > One of the reason this check is done is to avoid starting the PMD
> > threads before the addresses are translated in case of NUMA
> > reallocation, as virtqueues and virtio-net device structs can be
> > reallocated on a different node.
> > 
> > I think the right fix would be to only perform NUMA reallocation for
> > vring 0, as today we would end-up reallocating virtio-net struct
> > mulitple time if VQs are on different NUMA nodes.
> > 
> > Doing that, we could then consider the device is ready if vring 0 is
> > enabled and its ring addresses are translated, and if other vrings have
> > been kicked.
> > 
> > I'll post a patch shortly implementing this idea.
> 
> The proposed solution doesn't work, because disabled queues get accessed at
> device start time:
> 
> int
> rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
> {
> ..
> 	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
> 	return 0;
> }
> 
> The above function being called in Vhost PMD for every queues, enabled
> or not. While we could fix the PMD, it could break other applications
> using the Vhost lib API directly, so we cannot translate at enable
> time reliably.
> 
> I think we may be a bit less conservative, and postpone addresses
> translation at kick time, whatever VHOST_USER_F_PROTOCOL_FEATURES is
> negotiated or not.
> 
> Regards,
> Maxime
> 
> > Thanks,
> > Maxime

I agree, enabling has nothing to do with it.

The spec is quite explicit:

Client must only process each ring when it is started.

and

Client must start ring upon receiving a kick (that is, detecting that file
descriptor is readable) on the descriptor specified by
VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
VHOST_USER_GET_VRING_BASE.
  
Maxime Coquelin Nov. 3, 2017, 3:54 p.m. UTC | #6
On 11/03/2017 04:15 PM, Michael S. Tsirkin wrote:
> On Fri, Nov 03, 2017 at 09:25:58AM +0100, Maxime Coquelin wrote:
>>
>>
>> On 11/02/2017 05:02 PM, Maxime Coquelin wrote:
>>>
>>>
>>> On 11/02/2017 09:21 AM, Maxime Coquelin wrote:
>>>> Hi Lei,
>>>>
>>>> On 11/02/2017 08:21 AM, Yao, Lei A wrote:
>>>>>
>>>> ...
>>>>> Hi, Maxime > I met one issue with your patch set during the v17.11 test.
>>>>
>>>> Is it with v17.11-rc2 or -rc1?
>>>>
>>>>> The test scenario is following,
>>>>> 1.    Bind one NIC, use test-pmd set vhost-user with 2 queue
>>>>> usertools/dpdk-devbind.py --bind=igb_uio 0000:05:00.0
>>>>> ./x86_64-native-linuxapp-gcc/app/testpmd -c 0xe -n 4
>>>>> --socket-mem 1024,1024 \
>>>>> --vdev 'net_vhost0,iface=vhost-net,queues=2' - -i --rxq=2
>>>>> --txq=2 --nb-cores=2 --rss-ip
>>>>> 2.    Launch qemu with  virtio device which has 2 queue
>>>>> 3.    In VM, launch testpmd with virtio-pmd using only 1 queue.
>>>>> x86_64-native-linuxapp-gcc/app/testpmd -c 0x07 -n 3 - -i
>>>>> --txqflags=0xf01 \
>>>>> --rxq=1 --txq=1 --rss-ip --nb-cores=1
>>>>>
>>>>> First,
>>>>> commit 09927b5249694bad1c094d3068124673722e6b8f
>>>>> vhost: translate ring addresses when IOMMU enabled
>>>>> The patch causes no traffic in PVP test. but link status is
>>>>> still up in vhost-user.
>>>>>
>>>>> Second,
>>>>> eefac9536a901a1f0bb52aa3b6fec8f375f09190
>>>>> vhost: postpone device creation until rings are mapped
>>>>> The patch causes link status "down" in vhost-user.
>>>
>>> I reproduced this one, and understand why link status remains down.
>>> My series did fixed a potential issue Michael raised, that the vring
>>> addresses should only interpreted once the ring is enabled.
>>> When VHOST_USER_F_PROTOCOL_FEATURES is negotiated, the rings addrs are
>>> translated when ring is enabled via VHOST_USER_SET_VRING_ENABLE.
>>> When not negotiated, the ring is considered started enabled, so
>>> translation is done at VHOST_USER_SET_VRING_KICK time.
>>>
>>> In your case, protocol features are negotiated, so the ring addresses
>>> are translated at enable time. The problem is that the code considers
>>> the device is ready once addresses for all the rings are translated.
>>> But since only the first pair of rings is used, it never happens, and
>>> the link remains down.
>>>
>>> One of the reason this check is done is to avoid starting the PMD
>>> threads before the addresses are translated in case of NUMA
>>> reallocation, as virtqueues and virtio-net device structs can be
>>> reallocated on a different node.
>>>
>>> I think the right fix would be to only perform NUMA reallocation for
>>> vring 0, as today we would end-up reallocating virtio-net struct
>>> mulitple time if VQs are on different NUMA nodes.
>>>
>>> Doing that, we could then consider the device is ready if vring 0 is
>>> enabled and its ring addresses are translated, and if other vrings have
>>> been kicked.
>>>
>>> I'll post a patch shortly implementing this idea.
>>
>> The proposed solution doesn't work, because disabled queues get accessed at
>> device start time:
>>
>> int
>> rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
>> {
>> ..
>> 	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
>> 	return 0;
>> }
>>
>> The above function being called in Vhost PMD for every queues, enabled
>> or not. While we could fix the PMD, it could break other applications
>> using the Vhost lib API directly, so we cannot translate at enable
>> time reliably.
>>
>> I think we may be a bit less conservative, and postpone addresses
>> translation at kick time, whatever VHOST_USER_F_PROTOCOL_FEATURES is
>> negotiated or not.
>>
>> Regards,
>> Maxime
>>
>>> Thanks,
>>> Maxime
> 
> I agree, enabling has nothing to do with it.
> 
> The spec is quite explicit:
> 
> Client must only process each ring when it is started.
> 
> and
> 
> Client must start ring upon receiving a kick (that is, detecting that file
> descriptor is readable) on the descriptor specified by
> VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
> VHOST_USER_GET_VRING_BASE.
> 

Thanks for the confirmation Michael, fix posted.

Maxime
  

Patch

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0e2ad3322..ef54835a6 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -135,6 +135,43 @@  free_device(struct virtio_net *dev)
 	rte_free(dev);
 }
 
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint64_t size;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+		goto out;
+
+	size = sizeof(struct vring_desc) * vq->size;
+	vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.desc_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->desc)
+		return -1;
+
+	size = sizeof(struct vring_avail);
+	size += sizeof(uint16_t) * vq->size;
+	vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.avail_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->avail)
+		return -1;
+
+	size = sizeof(struct vring_used);
+	size += sizeof(struct vring_used_elem) * vq->size;
+	vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
+						vq->ring_addrs.used_user_addr,
+						size, VHOST_ACCESS_RW);
+	if (!vq->used)
+		return -1;
+
+out:
+	vq->access_ok = 1;
+
+	return 0;
+}
+
 static void
 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 903da5db5..b3fe6bb8e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -113,6 +113,7 @@  struct vhost_virtqueue {
 	/* Currently unused as polling mode is enabled */
 	int			kickfd;
 	int			enabled;
+	int			access_ok;
 
 	/* Physical address of used ring, for logging */
 	uint64_t		log_guest_addr;
@@ -378,6 +379,7 @@  void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint64_t iova, uint64_t size, uint8_t perm);
+int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 90b209764..dd6562fd8 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -391,6 +391,12 @@  vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
 	 */
 	memcpy(&vq->ring_addrs, addr, sizeof(*addr));
 
+	vq->desc = NULL;
+	vq->avail = NULL;
+	vq->used = NULL;
+
+	vq->access_ok = 0;
+
 	return 0;
 }
 
@@ -407,10 +413,10 @@  static struct virtio_net *translate_ring_addresses(struct virtio_net *dev,
 	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->desc_user_addr, sizeof(struct vring_desc));
 	if (vq->desc == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find desc ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	dev = numa_realloc(dev, vq_index);
@@ -419,19 +425,19 @@  static struct virtio_net *translate_ring_addresses(struct virtio_net *dev,
 	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->avail_user_addr, sizeof(struct vring_avail));
 	if (vq->avail == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find avail ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
 			vq, addr->used_user_addr, sizeof(struct vring_used));
 	if (vq->used == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
+		RTE_LOG(DEBUG, VHOST_CONFIG,
 			"(%d) failed to find used ring address.\n",
 			dev->vid);
-		return NULL;
+		return dev;
 	}
 
 	if (vq->last_used_idx != vq->used->idx) {
@@ -677,7 +683,7 @@  vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 static int
 vq_is_ready(struct vhost_virtqueue *vq)
 {
-	return vq && vq->desc   &&
+	return vq && vq->desc && vq->avail && vq->used &&
 	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
 	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
@@ -986,8 +992,29 @@  vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
 }
 
 static int
-vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
+is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
 {
+	struct vhost_vring_addr *ra;
+	uint64_t start, end;
+
+	start = imsg->iova;
+	end = start + imsg->size;
+
+	ra = &vq->ring_addrs;
+	if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
+		return 1;
+	if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
+		return 1;
+	if (ra->used_user_addr >= start && ra->used_user_addr < end)
+		return 1;
+
+	return 0;
+}
+
+static int
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
+{
+	struct virtio_net *dev = *pdev;
 	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
 	uint16_t i;
 	uint64_t vva;
@@ -1003,6 +1030,9 @@  vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
 
 			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
 					imsg->size, imsg->perm);
+
+			if (is_vring_iotlb_update(vq, imsg))
+				*pdev = dev = translate_ring_addresses(dev, i);
 		}
 		break;
 	case VHOST_IOTLB_INVALIDATE:
@@ -1151,8 +1181,12 @@  vhost_user_msg_handler(int vid, int fd)
 	}
 
 	ret = 0;
-	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
-		vhost_message_str[msg.request]);
+	if (msg.request != VHOST_USER_IOTLB_MSG)
+		RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+			vhost_message_str[msg.request]);
+	else
+		RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
+			vhost_message_str[msg.request]);
 
 	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
 	if (ret < 0) {
@@ -1254,7 +1288,7 @@  vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_IOTLB_MSG:
-		ret = vhost_user_iotlb_msg(dev, &msg);
+		ret = vhost_user_iotlb_msg(&dev, &msg);
 		break;
 
 	default:
@@ -1263,12 +1297,6 @@  vhost_user_msg_handler(int vid, int fd)
 
 	}
 
-	/*
-	 * The virtio_net struct might have been reallocated on a different
-	 * NUMA node, so dev pointer might no more be valid.
-	 */
-	dev = get_device(vid);
-
 	if (msg.flags & VHOST_USER_NEED_REPLY) {
 		msg.payload.u64 = !!ret;
 		msg.size = sizeof(msg.payload.u64);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index cdfb6f957..b75c93cf1 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -329,13 +329,23 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0)) {
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+	}
+
 	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
 	start_idx = vq->last_used_idx;
 	free_entries = avail_idx - start_idx;
 	count = RTE_MIN(count, free_entries);
 	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
 	if (count == 0)
-		return 0;
+		goto out;
 
 	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
 		dev->vid, start_idx, start_idx + count);
@@ -356,10 +366,6 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	}
 
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (i = 0; i < count; i++) {
 		uint16_t desc_idx = desc_indexes[i];
 		int err;
@@ -394,9 +400,6 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	do_data_copy_enqueue(dev, vq);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	rte_smp_wmb();
 
 	*(volatile uint16_t *)&vq->used->idx += count;
@@ -412,6 +415,10 @@  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 			&& (vq->callfd >= 0))
 		eventfd_write(vq->callfd, (eventfd_t)1);
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	return count;
 }
 
@@ -647,9 +654,16 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 	if (count == 0)
-		return 0;
+		goto out;
 
 	vq->batch_copy_nb_elems = 0;
 
@@ -657,10 +671,6 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	vq->shadow_used_idx = 0;
 	avail_head = *((volatile uint16_t *)&vq->avail->idx);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
@@ -689,9 +699,6 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	do_data_copy_enqueue(dev, vq);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	if (likely(vq->shadow_used_idx)) {
 		flush_shadow_used_ring(dev, vq);
 
@@ -704,6 +711,10 @@  virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			eventfd_write(vq->callfd, (eventfd_t)1);
 	}
 
+out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	return pkt_idx;
 }
 
@@ -1173,6 +1184,13 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	vq->batch_copy_nb_elems = 0;
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
 		int nr_updated = 0;
@@ -1262,10 +1280,6 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
 	for (i = 0; i < count; i++) {
 		struct vring_desc *desc;
 		uint16_t sz, idx;
@@ -1329,9 +1343,6 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
 	}
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
 	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
@@ -1341,6 +1352,9 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	}
 
 out:
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_unlock(vq);
+
 	if (unlikely(rarp_mbuf != NULL)) {
 		/*
 		 * Inject it to the head of "pkts" array, so that switch's mac