[2/3] vhost: fix virtqueue access lock in datapath

Message ID 20231023095520.2864868-2-david.marchand@redhat.com (mailing list archive)
State Superseded
Delegated to: Maxime Coquelin
Headers
Series [1/3] vhost: robustify virtqueue access lock asserts |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

David Marchand Oct. 23, 2023, 9:55 a.m. UTC
  Now that a r/w lock is used, the access_ok field should only be updated
under a write lock.

Since the datapath code only takes a read lock on the virtqueue to check
access_ok, this lock must be released and a write lock taken before
calling vring_translate().

Fixes: 03f77d66d966 ("vhost: change virtqueue access lock to a read/write one")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/vhost/virtio_net.c | 60 +++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 16 deletions(-)
  

Comments

Eelco Chaudron Oct. 27, 2023, 9:03 a.m. UTC | #1
On 23 Oct 2023, at 11:55, David Marchand wrote:

> Now that a r/w lock is used, the access_ok field should only be updated
> under a write lock.
>
> Since the datapath code only takes a read lock on the virtqueue to check
> access_ok, this lock must be released and a write lock taken before
> calling vring_translate().
>
> Fixes: 03f77d66d966 ("vhost: change virtqueue access lock to a read/write one")
> Cc: stable@dpdk.org
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Only one question, but whatever the outcome is the change looks good to me.

Acked-by: Eelco Chaudron <echaudro@redhat.com>

> ---
>  lib/vhost/virtio_net.c | 60 +++++++++++++++++++++++++++++++-----------
>  1 file changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 759a78e3e3..4116f79d4f 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -1694,6 +1694,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
>  	return pkt_idx;
>  }
>
> +static void
> +virtio_dev_vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{

Would it be an idea to annotate this function that it needs to be called with the “read locks” (and that it will free them) to avoid the duplicate:

+		vhost_user_iotlb_rd_unlock(vq);
+		rte_rwlock_read_unlock(&vq->access_lock);

> +	rte_rwlock_write_lock(&vq->access_lock);
> +	vhost_user_iotlb_rd_lock(vq);
> +	if (!vq->access_ok)
> +		vring_translate(dev, vq);
> +	vhost_user_iotlb_rd_unlock(vq);
> +	rte_rwlock_write_unlock(&vq->access_lock);
> +}
> +
>  static __rte_always_inline uint32_t
>  virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  	struct rte_mbuf **pkts, uint32_t count)
> @@ -1708,9 +1719,13 @@ virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
>  	vhost_user_iotlb_rd_lock(vq);
>
> -	if (unlikely(!vq->access_ok))
> -		if (unlikely(vring_translate(dev, vq) < 0))
> -			goto out;
> +	if (unlikely(!vq->access_ok)) {
> +		vhost_user_iotlb_rd_unlock(vq);
> +		rte_rwlock_read_unlock(&vq->access_lock);
> +
> +		virtio_dev_vring_translate(dev, vq);
> +		goto out_no_unlock;
> +	}
>
>  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>  	if (count == 0)
> @@ -1729,6 +1744,7 @@ virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  out_access_unlock:
>  	rte_rwlock_read_unlock(&vq->access_lock);
>
> +out_no_unlock:
>  	return nb_tx;
>  }
>
> @@ -2523,9 +2539,13 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
>  	vhost_user_iotlb_rd_lock(vq);
>
> -	if (unlikely(!vq->access_ok))
> -		if (unlikely(vring_translate(dev, vq) < 0))
> -			goto out;
> +	if (unlikely(!vq->access_ok)) {
> +		vhost_user_iotlb_rd_unlock(vq);
> +		rte_rwlock_read_unlock(&vq->access_lock);
> +
> +		virtio_dev_vring_translate(dev, vq);
> +		goto out_no_unlock;
> +	}
>
>  	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
>  	if (count == 0)
> @@ -2546,6 +2566,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
>  out_access_unlock:
>  	rte_rwlock_write_unlock(&vq->access_lock);
>
> +out_no_unlock:
>  	return nb_tx;
>  }
>
> @@ -3576,11 +3597,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>
>  	vhost_user_iotlb_rd_lock(vq);
>
> -	if (unlikely(!vq->access_ok))
> -		if (unlikely(vring_translate(dev, vq) < 0)) {
> -			count = 0;
> -			goto out;
> -		}
> +	if (unlikely(!vq->access_ok)) {
> +		vhost_user_iotlb_rd_unlock(vq);
> +		rte_rwlock_read_unlock(&vq->access_lock);
> +
> +		virtio_dev_vring_translate(dev, vq);
> +		goto out_no_unlock;
> +	}
>
>  	/*
>  	 * Construct a RARP broadcast packet, and inject it to the "pkts"
> @@ -3641,6 +3664,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>  	if (unlikely(rarp_mbuf != NULL))
>  		count += 1;
>
> +out_no_unlock:
>  	return count;
>  }
>
> @@ -4190,11 +4214,14 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
>
>  	vhost_user_iotlb_rd_lock(vq);
>
> -	if (unlikely(vq->access_ok == 0))
> -		if (unlikely(vring_translate(dev, vq) < 0)) {
> -			count = 0;
> -			goto out;
> -		}
> +	if (unlikely(vq->access_ok == 0)) {
> +		vhost_user_iotlb_rd_unlock(vq);
> +		rte_rwlock_read_unlock(&vq->access_lock);
> +
> +		virtio_dev_vring_translate(dev, vq);
> +		count = 0;
> +		goto out_no_unlock;
> +	}
>
>  	/*
>  	 * Construct a RARP broadcast packet, and inject it to the "pkts"
> @@ -4260,5 +4287,6 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
>  	if (unlikely(rarp_mbuf != NULL))
>  		count += 1;
>
> +out_no_unlock:
>  	return count;
>  }
> -- 
> 2.41.0
  
David Marchand Oct. 27, 2023, 9:22 a.m. UTC | #2
On Fri, Oct 27, 2023 at 11:05 AM Eelco Chaudron <echaudro@redhat.com> wrote:
> > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> > index 759a78e3e3..4116f79d4f 100644
> > --- a/lib/vhost/virtio_net.c
> > +++ b/lib/vhost/virtio_net.c
> > @@ -1694,6 +1694,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
> >       return pkt_idx;
> >  }
> >
> > +static void
> > +virtio_dev_vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
> > +{
>
> Would it be an idea to annotate this function that it needs to be called with the “read locks” (and that it will free them) to avoid the duplicate:
>
> +               vhost_user_iotlb_rd_unlock(vq);
> +               rte_rwlock_read_unlock(&vq->access_lock);

The "unlock" annotations do not express read/write concerns for locks.
So that would make the code less readable and potentially hide some issues.

I prefer to keep as is, with clear calls to rd_lock / rd_unlock in
those functions.

>
> > +     rte_rwlock_write_lock(&vq->access_lock);
> > +     vhost_user_iotlb_rd_lock(vq);
> > +     if (!vq->access_ok)
> > +             vring_translate(dev, vq);
> > +     vhost_user_iotlb_rd_unlock(vq);
> > +     rte_rwlock_write_unlock(&vq->access_lock);
> > +}
> > +
  
Eelco Chaudron Oct. 27, 2023, 10:11 a.m. UTC | #3
On 27 Oct 2023, at 11:22, David Marchand wrote:

> On Fri, Oct 27, 2023 at 11:05 AM Eelco Chaudron <echaudro@redhat.com> wrote:
>>> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
>>> index 759a78e3e3..4116f79d4f 100644
>>> --- a/lib/vhost/virtio_net.c
>>> +++ b/lib/vhost/virtio_net.c
>>> @@ -1694,6 +1694,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
>>>       return pkt_idx;
>>>  }
>>>
>>> +static void
>>> +virtio_dev_vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
>>> +{
>>
>> Would it be an idea to annotate this function that it needs to be called with the “read locks” (and that it will free them) to avoid the duplicate:
>>
>> +               vhost_user_iotlb_rd_unlock(vq);
>> +               rte_rwlock_read_unlock(&vq->access_lock);
>
> The "unlock" annotations do not express read/write concerns for locks.
> So that would make the code less readable and potentially hide some issues.
>
> I prefer to keep as is, with clear calls to rd_lock / rd_unlock in
> those functions.

ACK, keeping this as is fine by me.

Acked-by: Eelco Chaudron <echaudro@redhat.com>

>>> +     rte_rwlock_write_lock(&vq->access_lock);
>>> +     vhost_user_iotlb_rd_lock(vq);
>>> +     if (!vq->access_ok)
>>> +             vring_translate(dev, vq);
>>> +     vhost_user_iotlb_rd_unlock(vq);
>>> +     rte_rwlock_write_unlock(&vq->access_lock);
>>> +}
>>> +
>
> -- 
> David Marchand
  
Maxime Coquelin Dec. 5, 2023, 9:10 a.m. UTC | #4
On 10/23/23 11:55, David Marchand wrote:
> Now that a r/w lock is used, the access_ok field should only be updated
> under a write lock.
> 
> Since the datapath code only takes a read lock on the virtqueue to check
> access_ok, this lock must be released and a write lock taken before
> calling vring_translate().
> 
> Fixes: 03f77d66d966 ("vhost: change virtqueue access lock to a read/write one")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>   lib/vhost/virtio_net.c | 60 +++++++++++++++++++++++++++++++-----------
>   1 file changed, 44 insertions(+), 16 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
  

Patch

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 759a78e3e3..4116f79d4f 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1694,6 +1694,17 @@  virtio_dev_rx_packed(struct virtio_net *dev,
 	return pkt_idx;
 }
 
+static void
+virtio_dev_vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	rte_rwlock_write_lock(&vq->access_lock);
+	vhost_user_iotlb_rd_lock(vq);
+	if (!vq->access_ok)
+		vring_translate(dev, vq);
+	vhost_user_iotlb_rd_unlock(vq);
+	rte_rwlock_write_unlock(&vq->access_lock);
+}
+
 static __rte_always_inline uint32_t
 virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
@@ -1708,9 +1719,13 @@  virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	vhost_user_iotlb_rd_lock(vq);
 
-	if (unlikely(!vq->access_ok))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+	if (unlikely(!vq->access_ok)) {
+		vhost_user_iotlb_rd_unlock(vq);
+		rte_rwlock_read_unlock(&vq->access_lock);
+
+		virtio_dev_vring_translate(dev, vq);
+		goto out_no_unlock;
+	}
 
 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 	if (count == 0)
@@ -1729,6 +1744,7 @@  virtio_dev_rx(struct virtio_net *dev, struct vhost_virtqueue *vq,
 out_access_unlock:
 	rte_rwlock_read_unlock(&vq->access_lock);
 
+out_no_unlock:
 	return nb_tx;
 }
 
@@ -2523,9 +2539,13 @@  virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	vhost_user_iotlb_rd_lock(vq);
 
-	if (unlikely(!vq->access_ok))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+	if (unlikely(!vq->access_ok)) {
+		vhost_user_iotlb_rd_unlock(vq);
+		rte_rwlock_read_unlock(&vq->access_lock);
+
+		virtio_dev_vring_translate(dev, vq);
+		goto out_no_unlock;
+	}
 
 	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 	if (count == 0)
@@ -2546,6 +2566,7 @@  virtio_dev_rx_async_submit(struct virtio_net *dev, struct vhost_virtqueue *vq,
 out_access_unlock:
 	rte_rwlock_write_unlock(&vq->access_lock);
 
+out_no_unlock:
 	return nb_tx;
 }
 
@@ -3576,11 +3597,13 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	vhost_user_iotlb_rd_lock(vq);
 
-	if (unlikely(!vq->access_ok))
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
+	if (unlikely(!vq->access_ok)) {
+		vhost_user_iotlb_rd_unlock(vq);
+		rte_rwlock_read_unlock(&vq->access_lock);
+
+		virtio_dev_vring_translate(dev, vq);
+		goto out_no_unlock;
+	}
 
 	/*
 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
@@ -3641,6 +3664,7 @@  rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	if (unlikely(rarp_mbuf != NULL))
 		count += 1;
 
+out_no_unlock:
 	return count;
 }
 
@@ -4190,11 +4214,14 @@  rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 
 	vhost_user_iotlb_rd_lock(vq);
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
+	if (unlikely(vq->access_ok == 0)) {
+		vhost_user_iotlb_rd_unlock(vq);
+		rte_rwlock_read_unlock(&vq->access_lock);
+
+		virtio_dev_vring_translate(dev, vq);
+		count = 0;
+		goto out_no_unlock;
+	}
 
 	/*
 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
@@ -4260,5 +4287,6 @@  rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
 	if (unlikely(rarp_mbuf != NULL))
 		count += 1;
 
+out_no_unlock:
 	return count;
 }