[3/5] vhost: do not inline unlikely fragmented buffers code

Message ID 20190517122220.31283-4-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Headers
Series vhost: I-cache pressure optimizations |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Maxime Coquelin May 17, 2019, 12:22 p.m. UTC
  Handling of fragmented virtio-net header and indirect descriptors
tables was implemented to fix CVE-2018-1059. It should not never
happen with healthy guests and so are already considered as
unlikely code path.

This patch moves these bits into non-inline dedicated functions
to reduce the I-cache pressure.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.c      |  33 +++++++++++
 lib/librte_vhost/vhost.h      |  35 +-----------
 lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
 3 files changed, 91 insertions(+), 79 deletions(-)
  

Comments

Maxime Coquelin May 17, 2019, 12:57 p.m. UTC | #1
On 5/17/19 2:22 PM, Maxime Coquelin wrote:
> Handling of fragmented virtio-net header and indirect descriptors
> tables was implemented to fix CVE-2018-1059. It should not never
> happen with healthy guests and so are already considered as
> unlikely code path.
> 
> This patch moves these bits into non-inline dedicated functions
> to reduce the I-cache pressure.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>   lib/librte_vhost/vhost.c      |  33 +++++++++++
>   lib/librte_vhost/vhost.h      |  35 +-----------
>   lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>   3 files changed, 91 insertions(+), 79 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 4a54ad6bd1..8a4379bc13 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   }
>   
>   
> +void *
> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		uint64_t desc_addr, uint64_t desc_len)
> +{
> +	void *idesc;
> +	uint64_t src, dst;
> +	uint64_t len, remain = desc_len;
> +
> +	idesc = rte_malloc(__func__, desc_len, 0);
> +	if (unlikely(!idesc))
> +		return NULL;
> +
> +	dst = (uint64_t)(uintptr_t)idesc;
> +
> +	while (remain) {
> +		len = remain;
> +		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> +				VHOST_ACCESS_RO);
> +		if (unlikely(!src || !len)) {
> +			rte_free(idesc);
> +			return NULL;
> +		}
> +
> +		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> +
> +		remain -= len;
> +		dst += len;
> +		desc_addr += len;
> +	}
> +
> +	return idesc;
> +}
> +
>   void
>   cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>   {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3ab7b4950f..ab26454e1c 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>   
>   uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   			uint64_t iova, uint64_t *len, uint8_t perm);
> +void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +			uint64_t desc_addr, uint64_t desc_len);
>   int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>   void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>   
> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
>   		eventfd_write(vq->callfd, (eventfd_t)1);
>   }
>   
> -static __rte_always_inline void *
> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -		uint64_t desc_addr, uint64_t desc_len)
> -{
> -	void *idesc;
> -	uint64_t src, dst;
> -	uint64_t len, remain = desc_len;
> -
> -	idesc = rte_malloc(__func__, desc_len, 0);
> -	if (unlikely(!idesc))
> -		return 0;
> -
> -	dst = (uint64_t)(uintptr_t)idesc;
> -
> -	while (remain) {
> -		len = remain;
> -		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> -				VHOST_ACCESS_RO);
> -		if (unlikely(!src || !len)) {
> -			rte_free(idesc);
> -			return 0;
> -		}
> -
> -		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> -
> -		remain -= len;
> -		dst += len;
> -		desc_addr += len;
> -	}
> -
> -	return idesc;
> -}
> -
>   static __rte_always_inline void
>   free_ind_table(void *idesc)
>   {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 35ae4992c2..494dd9957e 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   	return 0;
>   }
>   
> +static void
> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		struct buf_vector *buf_vec,
> +		struct virtio_net_hdr_mrg_rxbuf *hdr){

I seem to have missed above open bracket coding style issue while
running checkpatch.

Will fix in next revision.

> +	uint64_t len;
> +	uint64_t remain = dev->vhost_hlen;
> +	uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> +	uint64_t iova = buf_vec->buf_iova;
> +
> +	while (remain) {
> +		len = RTE_MIN(remain,
> +				buf_vec->buf_len);
> +		dst = buf_vec->buf_addr;
> +		rte_memcpy((void *)(uintptr_t)dst,
> +				(void *)(uintptr_t)src,
> +				len);
> +
> +		PRINT_PACKET(dev, (uintptr_t)dst,
> +				(uint32_t)len, 0);
> +		vhost_log_cache_write(dev, vq,
> +				iova, len);
> +
> +		remain -= len;
> +		iova += len;
> +		src += len;
> +		buf_vec++;
> +	}
> +}
> +
>   static __rte_always_inline int
>   copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   			    struct rte_mbuf *m, struct buf_vector *buf_vec,
> @@ -703,30 +732,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   						num_buffers);
>   
>   			if (unlikely(hdr == &tmp_hdr)) {
> -				uint64_t len;
> -				uint64_t remain = dev->vhost_hlen;
> -				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> -				uint64_t iova = buf_vec[0].buf_iova;
> -				uint16_t hdr_vec_idx = 0;
> -
> -				while (remain) {
> -					len = RTE_MIN(remain,
> -						buf_vec[hdr_vec_idx].buf_len);
> -					dst = buf_vec[hdr_vec_idx].buf_addr;
> -					rte_memcpy((void *)(uintptr_t)dst,
> -							(void *)(uintptr_t)src,
> -							len);
> -
> -					PRINT_PACKET(dev, (uintptr_t)dst,
> -							(uint32_t)len, 0);
> -					vhost_log_cache_write(dev, vq,
> -							iova, len);
> -
> -					remain -= len;
> -					iova += len;
> -					src += len;
> -					hdr_vec_idx++;
> -				}
> +				copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
>   			} else {
>   				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
>   						dev->vhost_hlen, 0);
> @@ -1063,6 +1069,31 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
>   	}
>   }
>   
> +static void
> +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
> +		struct buf_vector *buf_vec)
> +{
> +	uint64_t len;
> +	uint64_t remain = sizeof(struct virtio_net_hdr);
> +	uint64_t src;
> +	uint64_t dst = (uint64_t)(uintptr_t)&hdr;
> +
> +	/*
> +	 * No luck, the virtio-net header doesn't fit
> +	 * in a contiguous virtual area.
> +	 */
> +	while (remain) {
> +		len = RTE_MIN(remain, buf_vec->buf_len);
> +		src = buf_vec->buf_addr;
> +		rte_memcpy((void *)(uintptr_t)dst,
> +				(void *)(uintptr_t)src, len);
> +
> +		remain -= len;
> +		dst += len;
> +		buf_vec++;
> +	}
> +}
> +
>   static __rte_always_inline int
>   copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   		  struct buf_vector *buf_vec, uint16_t nr_vec,
> @@ -1094,28 +1125,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   
>   	if (virtio_net_with_host_offload(dev)) {
>   		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> -			uint64_t len;
> -			uint64_t remain = sizeof(struct virtio_net_hdr);
> -			uint64_t src;
> -			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
> -			uint16_t hdr_vec_idx = 0;
> -
> -			/*
> -			 * No luck, the virtio-net header doesn't fit
> -			 * in a contiguous virtual area.
> -			 */
> -			while (remain) {
> -				len = RTE_MIN(remain,
> -					buf_vec[hdr_vec_idx].buf_len);
> -				src = buf_vec[hdr_vec_idx].buf_addr;
> -				rte_memcpy((void *)(uintptr_t)dst,
> -						   (void *)(uintptr_t)src, len);
> -
> -				remain -= len;
> -				dst += len;
> -				hdr_vec_idx++;
> -			}
> -
> +			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
>   			hdr = &tmp_hdr;
>   		} else {
>   			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
>
  
Mattias Rönnblom May 21, 2019, 7:43 p.m. UTC | #2
On 2019-05-17 14:22, Maxime Coquelin wrote:
> Handling of fragmented virtio-net header and indirect descriptors
> tables was implemented to fix CVE-2018-1059. It should not never
> happen with healthy guests and so are already considered as
> unlikely code path.
> 
> This patch moves these bits into non-inline dedicated functions
> to reduce the I-cache pressure.
> 
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>   lib/librte_vhost/vhost.c      |  33 +++++++++++
>   lib/librte_vhost/vhost.h      |  35 +-----------
>   lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>   3 files changed, 91 insertions(+), 79 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 4a54ad6bd1..8a4379bc13 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   }
>   
>   
> +void *
> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,

This function should have a prefix.

> +		uint64_t desc_addr, uint64_t desc_len)
> +{
> +	void *idesc;
> +	uint64_t src, dst;
> +	uint64_t len, remain = desc_len;
> +
> +	idesc = rte_malloc(__func__, desc_len, 0);
> +	if (unlikely(!idesc))

if (idesc == NULL)

> +		return NULL;
> +
> +	dst = (uint64_t)(uintptr_t)idesc;
> +
> +	while (remain) {
remain > 0
> +		len = remain;
> +		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> +				VHOST_ACCESS_RO);
> +		if (unlikely(!src || !len)) {
> +			rte_free(idesc);
> +			return NULL;
> +		}
> +
> +		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);

Just for my understanding: what difference does that (uintptr_t) cast do?

> +
> +		remain -= len;
> +		dst += len;
> +		desc_addr += len;
> +	}
> +
> +	return idesc;
> +}
> +
>   void
>   cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>   {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3ab7b4950f..ab26454e1c 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>   
>   uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   			uint64_t iova, uint64_t *len, uint8_t perm);
> +void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +			uint64_t desc_addr, uint64_t desc_len);
>   int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>   void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>   
> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
>   		eventfd_write(vq->callfd, (eventfd_t)1);
>   }
>   
> -static __rte_always_inline void *
> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -		uint64_t desc_addr, uint64_t desc_len)
> -{
> -	void *idesc;
> -	uint64_t src, dst;
> -	uint64_t len, remain = desc_len;
> -
> -	idesc = rte_malloc(__func__, desc_len, 0);
> -	if (unlikely(!idesc))
> -		return 0;
> -
> -	dst = (uint64_t)(uintptr_t)idesc;
> -
> -	while (remain) {
> -		len = remain;
> -		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> -				VHOST_ACCESS_RO);
> -		if (unlikely(!src || !len)) {
> -			rte_free(idesc);
> -			return 0;
> -		}
> -
> -		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> -
> -		remain -= len;
> -		dst += len;
> -		desc_addr += len;
> -	}
> -
> -	return idesc;
> -}
> -
>   static __rte_always_inline void
>   free_ind_table(void *idesc)
>   {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 35ae4992c2..494dd9957e 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   	return 0;
>   }
>   
> +static void
> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,

__rte_noinline? Or you don't care about this function being inlined or not?

> +		struct buf_vector *buf_vec,
> +		struct virtio_net_hdr_mrg_rxbuf *hdr){
> +	uint64_t len;
> +	uint64_t remain = dev->vhost_hlen;
> +	uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> +	uint64_t iova = buf_vec->buf_iova;
> +
> +	while (remain) {
remain > 0
> +		len = RTE_MIN(remain,
> +				buf_vec->buf_len);
> +		dst = buf_vec->buf_addr;
> +		rte_memcpy((void *)(uintptr_t)dst,
> +				(void *)(uintptr_t)src,
> +				len);
> +
> +		PRINT_PACKET(dev, (uintptr_t)dst,
> +				(uint32_t)len, 0);
> +		vhost_log_cache_write(dev, vq,
> +				iova, len);
> +
> +		remain -= len;
> +		iova += len;
> +		src += len;
> +		buf_vec++;
> +	}
> +}
> +
>   static __rte_always_inline int
>   copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   			    struct rte_mbuf *m, struct buf_vector *buf_vec,
> @@ -703,30 +732,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   						num_buffers);
>   
>   			if (unlikely(hdr == &tmp_hdr)) {
> -				uint64_t len;
> -				uint64_t remain = dev->vhost_hlen;
> -				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> -				uint64_t iova = buf_vec[0].buf_iova;
> -				uint16_t hdr_vec_idx = 0;
> -
> -				while (remain) {
> -					len = RTE_MIN(remain,
> -						buf_vec[hdr_vec_idx].buf_len);
> -					dst = buf_vec[hdr_vec_idx].buf_addr;
> -					rte_memcpy((void *)(uintptr_t)dst,
> -							(void *)(uintptr_t)src,
> -							len);
> -
> -					PRINT_PACKET(dev, (uintptr_t)dst,
> -							(uint32_t)len, 0);
> -					vhost_log_cache_write(dev, vq,
> -							iova, len);
> -
> -					remain -= len;
> -					iova += len;
> -					src += len;
> -					hdr_vec_idx++;
> -				}
> +				copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
>   			} else {
>   				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
>   						dev->vhost_hlen, 0);
> @@ -1063,6 +1069,31 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
>   	}
>   }
>   
> +static void
> +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
> +		struct buf_vector *buf_vec)

__rte_noinline?

> +{
> +	uint64_t len;
> +	uint64_t remain = sizeof(struct virtio_net_hdr);
> +	uint64_t src;
> +	uint64_t dst = (uint64_t)(uintptr_t)&hdr;
> +
> +	/*
> +	 * No luck, the virtio-net header doesn't fit
> +	 * in a contiguous virtual area.
> +	 */
> +	while (remain) {
> +		len = RTE_MIN(remain, buf_vec->buf_len);
> +		src = buf_vec->buf_addr;
> +		rte_memcpy((void *)(uintptr_t)dst,
> +				(void *)(uintptr_t)src, len);
> +
> +		remain -= len;
> +		dst += len;
> +		buf_vec++;
> +	}
> +}
> +
>   static __rte_always_inline int
>   copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   		  struct buf_vector *buf_vec, uint16_t nr_vec,
> @@ -1094,28 +1125,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>   
>   	if (virtio_net_with_host_offload(dev)) {
>   		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> -			uint64_t len;
> -			uint64_t remain = sizeof(struct virtio_net_hdr);
> -			uint64_t src;
> -			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
> -			uint16_t hdr_vec_idx = 0;
> -
> -			/*
> -			 * No luck, the virtio-net header doesn't fit
> -			 * in a contiguous virtual area.
> -			 */
> -			while (remain) {
> -				len = RTE_MIN(remain,
> -					buf_vec[hdr_vec_idx].buf_len);
> -				src = buf_vec[hdr_vec_idx].buf_addr;
> -				rte_memcpy((void *)(uintptr_t)dst,
> -						   (void *)(uintptr_t)src, len);
> -
> -				remain -= len;
> -				dst += len;
> -				hdr_vec_idx++;
> -			}
> -
> +			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
>   			hdr = &tmp_hdr;
>   		} else {
>   			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
>
  
Maxime Coquelin May 23, 2019, 2:30 p.m. UTC | #3
Hi Mattias,

On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
> On 2019-05-17 14:22, Maxime Coquelin wrote:
>> Handling of fragmented virtio-net header and indirect descriptors
>> tables was implemented to fix CVE-2018-1059. It should not never
>> happen with healthy guests and so are already considered as
>> unlikely code path.
>>
>> This patch moves these bits into non-inline dedicated functions
>> to reduce the I-cache pressure.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>   lib/librte_vhost/vhost.c      |  33 +++++++++++
>>   lib/librte_vhost/vhost.h      |  35 +-----------
>>   lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>>   3 files changed, 91 insertions(+), 79 deletions(-)
>>
>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>> index 4a54ad6bd1..8a4379bc13 100644
>> --- a/lib/librte_vhost/vhost.c
>> +++ b/lib/librte_vhost/vhost.c
>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, 
>> struct vhost_virtqueue *vq,
>>   }
>> +void *
>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> 
> This function should have a prefix.

This function is just moved from vhost.h to vhost.c, so not the purpose
of the patch.

But I agree your comment, I'll send a patch to add a prefix.

> 
>> +        uint64_t desc_addr, uint64_t desc_len)
>> +{
>> +    void *idesc;
>> +    uint64_t src, dst;
>> +    uint64_t len, remain = desc_len;
>> +
>> +    idesc = rte_malloc(__func__, desc_len, 0);
>> +    if (unlikely(!idesc))
> 
> if (idesc == NULL)

Ditto, that is not the purpose of the patch that is just moving the
function.

I agree this is not matching the coding rules specified in the
documentation, though.

> 
>> +        return NULL;
>> +
>> +    dst = (uint64_t)(uintptr_t)idesc;
>> +
>> +    while (remain) {
> remain > 0

Ditto.

>> +        len = remain;
>> +        src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>> +                VHOST_ACCESS_RO);
>> +        if (unlikely(!src || !len)) {
>> +            rte_free(idesc);
>> +            return NULL;
>> +        }
>> +
>> +        rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> 
> Just for my understanding: what difference does that (uintptr_t) cast do?

This is required to build 32bits (-Werror=int-to-pointer-cast)

>> +
>> +        remain -= len;
>> +        dst += len;
>> +        desc_addr += len;
>> +    }
>> +
>> +    return idesc;
>> +}
>> +
>>   void
>>   cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>>   {
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 3ab7b4950f..ab26454e1c 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>>   uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct 
>> vhost_virtqueue *vq,
>>               uint64_t iova, uint64_t *len, uint8_t perm);
>> +void *alloc_copy_ind_table(struct virtio_net *dev, struct 
>> vhost_virtqueue *vq,
>> +            uint64_t desc_addr, uint64_t desc_len);
>>   int vring_translate(struct virtio_net *dev, struct vhost_virtqueue 
>> *vq);
>>   void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue 
>> *vq);
>> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, 
>> struct vhost_virtqueue *vq)
>>           eventfd_write(vq->callfd, (eventfd_t)1);
>>   }
>> -static __rte_always_inline void *
>> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> -        uint64_t desc_addr, uint64_t desc_len)
>> -{
>> -    void *idesc;
>> -    uint64_t src, dst;
>> -    uint64_t len, remain = desc_len;
>> -
>> -    idesc = rte_malloc(__func__, desc_len, 0);
>> -    if (unlikely(!idesc))
>> -        return 0;
>> -
>> -    dst = (uint64_t)(uintptr_t)idesc;
>> -
>> -    while (remain) {
>> -        len = remain;
>> -        src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>> -                VHOST_ACCESS_RO);
>> -        if (unlikely(!src || !len)) {
>> -            rte_free(idesc);
>> -            return 0;
>> -        }
>> -
>> -        rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
>> -
>> -        remain -= len;
>> -        dst += len;
>> -        desc_addr += len;
>> -    }
>> -
>> -    return idesc;
>> -}
>> -
>>   static __rte_always_inline void
>>   free_ind_table(void *idesc)
>>   {
>> diff --git a/lib/librte_vhost/virtio_net.c 
>> b/lib/librte_vhost/virtio_net.c
>> index 35ae4992c2..494dd9957e 100644
>> --- a/lib/librte_vhost/virtio_net.c
>> +++ b/lib/librte_vhost/virtio_net.c
>> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, 
>> struct vhost_virtqueue *vq,
>>       return 0;
>>   }
>> +static void
>> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue 
>> *vq,
> 
> __rte_noinline? Or you don't care about this function being inlined or not?

Right, I'll add it here and there in next revision.

I'll try to send a patch to fix the kind of style issues you reported.
If you want to do it that would be great, just let me know.

Thanks,
Maxime
  
Mattias Rönnblom May 23, 2019, 3:17 p.m. UTC | #4
On 2019-05-23 16:30, Maxime Coquelin wrote:
> Hi Mattias,
> 
> On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
>> On 2019-05-17 14:22, Maxime Coquelin wrote:
>>> Handling of fragmented virtio-net header and indirect descriptors
>>> tables was implemented to fix CVE-2018-1059. It should not never
>>> happen with healthy guests and so are already considered as
>>> unlikely code path.
>>>
>>> This patch moves these bits into non-inline dedicated functions
>>> to reduce the I-cache pressure.
>>>
>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>> ---
>>>   lib/librte_vhost/vhost.c      |  33 +++++++++++
>>>   lib/librte_vhost/vhost.h      |  35 +-----------
>>>   lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>>>   3 files changed, 91 insertions(+), 79 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>> index 4a54ad6bd1..8a4379bc13 100644
>>> --- a/lib/librte_vhost/vhost.c
>>> +++ b/lib/librte_vhost/vhost.c
>>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, 
>>> struct vhost_virtqueue *vq,
>>>   }
>>> +void *
>>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue 
>>> *vq,
>>
>> This function should have a prefix.
> 
> This function is just moved from vhost.h to vhost.c, so not the purpose
> of the patch.
> 

It was declared "static inline" in the header file, and thus only 
affected those who included the file, as opposed to polluting the whole 
DPDK library name space.

> But I agree your comment, I'll send a patch to add a prefix.
> 
>>
>>> +        uint64_t desc_addr, uint64_t desc_len)
>>> +{
>>> +    void *idesc;
>>> +    uint64_t src, dst;
>>> +    uint64_t len, remain = desc_len;
>>> +
>>> +    idesc = rte_malloc(__func__, desc_len, 0);
>>> +    if (unlikely(!idesc))
>>
>> if (idesc == NULL)
> 
> Ditto, that is not the purpose of the patch that is just moving the
> function.
> 
> I agree this is not matching the coding rules specified in the
> documentation, though.
> 
>>
>>> +        return NULL;
>>> +
>>> +    dst = (uint64_t)(uintptr_t)idesc;
>>> +
>>> +    while (remain) {
>> remain > 0
> 
> Ditto.
> 
>>> +        len = remain;
>>> +        src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>>> +                VHOST_ACCESS_RO);
>>> +        if (unlikely(!src || !len)) {
>>> +            rte_free(idesc);
>>> +            return NULL;
>>> +        }
>>> +
>>> +        rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, 
>>> len);
>>
>> Just for my understanding: what difference does that (uintptr_t) cast do?
> 
> This is required to build 32bits (-Werror=int-to-pointer-cast)
> 

Ah. Thanks.

>>> +
>>> +        remain -= len;
>>> +        dst += len;
>>> +        desc_addr += len;
>>> +    }
>>> +
>>> +    return idesc;
>>> +}
>>> +
>>>   void
>>>   cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>>>   {
>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>> index 3ab7b4950f..ab26454e1c 100644
>>> --- a/lib/librte_vhost/vhost.h
>>> +++ b/lib/librte_vhost/vhost.h
>>> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>>>   uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct 
>>> vhost_virtqueue *vq,
>>>               uint64_t iova, uint64_t *len, uint8_t perm);
>>> +void *alloc_copy_ind_table(struct virtio_net *dev, struct 
>>> vhost_virtqueue *vq,
>>> +            uint64_t desc_addr, uint64_t desc_len);
>>>   int vring_translate(struct virtio_net *dev, struct vhost_virtqueue 
>>> *vq);
>>>   void vring_invalidate(struct virtio_net *dev, struct 
>>> vhost_virtqueue *vq);
>>> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, 
>>> struct vhost_virtqueue *vq)
>>>           eventfd_write(vq->callfd, (eventfd_t)1);
>>>   }
>>> -static __rte_always_inline void *
>>> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue 
>>> *vq,
>>> -        uint64_t desc_addr, uint64_t desc_len)
>>> -{
>>> -    void *idesc;
>>> -    uint64_t src, dst;
>>> -    uint64_t len, remain = desc_len;
>>> -
>>> -    idesc = rte_malloc(__func__, desc_len, 0);
>>> -    if (unlikely(!idesc))
>>> -        return 0;
>>> -
>>> -    dst = (uint64_t)(uintptr_t)idesc;
>>> -
>>> -    while (remain) {
>>> -        len = remain;
>>> -        src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>>> -                VHOST_ACCESS_RO);
>>> -        if (unlikely(!src || !len)) {
>>> -            rte_free(idesc);
>>> -            return 0;
>>> -        }
>>> -
>>> -        rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, 
>>> len);
>>> -
>>> -        remain -= len;
>>> -        dst += len;
>>> -        desc_addr += len;
>>> -    }
>>> -
>>> -    return idesc;
>>> -}
>>> -
>>>   static __rte_always_inline void
>>>   free_ind_table(void *idesc)
>>>   {
>>> diff --git a/lib/librte_vhost/virtio_net.c 
>>> b/lib/librte_vhost/virtio_net.c
>>> index 35ae4992c2..494dd9957e 100644
>>> --- a/lib/librte_vhost/virtio_net.c
>>> +++ b/lib/librte_vhost/virtio_net.c
>>> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, 
>>> struct vhost_virtqueue *vq,
>>>       return 0;
>>>   }
>>> +static void
>>> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue 
>>> *vq,
>>
>> __rte_noinline? Or you don't care about this function being inlined or 
>> not?
> 
> Right, I'll add it here and there in next revision.
> 
> I'll try to send a patch to fix the kind of style issues you reported.
> If you want to do it that would be great, just let me know.
> 

I just figured it made sense to address some style issues when you were 
shuffling things around.
  
Maxime Coquelin May 23, 2019, 5:40 p.m. UTC | #5
On 5/23/19 5:17 PM, Mattias Rönnblom wrote:
> On 2019-05-23 16:30, Maxime Coquelin wrote:
>> Hi Mattias,
>>
>> On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
>>> On 2019-05-17 14:22, Maxime Coquelin wrote:
>>>> Handling of fragmented virtio-net header and indirect descriptors
>>>> tables was implemented to fix CVE-2018-1059. It should not never
>>>> happen with healthy guests and so are already considered as
>>>> unlikely code path.
>>>>
>>>> This patch moves these bits into non-inline dedicated functions
>>>> to reduce the I-cache pressure.
>>>>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>>   lib/librte_vhost/vhost.c      |  33 +++++++++++
>>>>   lib/librte_vhost/vhost.h      |  35 +-----------
>>>>   lib/librte_vhost/virtio_net.c | 102 
>>>> +++++++++++++++++++---------------
>>>>   3 files changed, 91 insertions(+), 79 deletions(-)
>>>>
>>>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>>> index 4a54ad6bd1..8a4379bc13 100644
>>>> --- a/lib/librte_vhost/vhost.c
>>>> +++ b/lib/librte_vhost/vhost.c
>>>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, 
>>>> struct vhost_virtqueue *vq,
>>>>   }
>>>> +void *
>>>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue 
>>>> *vq,
>>>
>>> This function should have a prefix.
>>
>> This function is just moved from vhost.h to vhost.c, so not the purpose
>> of the patch.
>>
> 
> It was declared "static inline" in the header file, and thus only 
> affected those who included the file, as opposed to polluting the whole 
> DPDK library name space.

Right, I'll fix the name in next revision.

Thanks,
Maxime
  

Patch

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 4a54ad6bd1..8a4379bc13 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -201,6 +201,39 @@  __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
 }
 
 
+void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t desc_addr, uint64_t desc_len)
+{
+	void *idesc;
+	uint64_t src, dst;
+	uint64_t len, remain = desc_len;
+
+	idesc = rte_malloc(__func__, desc_len, 0);
+	if (unlikely(!idesc))
+		return NULL;
+
+	dst = (uint64_t)(uintptr_t)idesc;
+
+	while (remain) {
+		len = remain;
+		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !len)) {
+			rte_free(idesc);
+			return NULL;
+		}
+
+		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+		remain -= len;
+		dst += len;
+		desc_addr += len;
+	}
+
+	return idesc;
+}
+
 void
 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3ab7b4950f..ab26454e1c 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -488,6 +488,8 @@  void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			uint64_t iova, uint64_t *len, uint8_t perm);
+void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t desc_addr, uint64_t desc_len);
 int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
@@ -601,39 +603,6 @@  vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		eventfd_write(vq->callfd, (eventfd_t)1);
 }
 
-static __rte_always_inline void *
-alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint64_t desc_addr, uint64_t desc_len)
-{
-	void *idesc;
-	uint64_t src, dst;
-	uint64_t len, remain = desc_len;
-
-	idesc = rte_malloc(__func__, desc_len, 0);
-	if (unlikely(!idesc))
-		return 0;
-
-	dst = (uint64_t)(uintptr_t)idesc;
-
-	while (remain) {
-		len = remain;
-		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
-				VHOST_ACCESS_RO);
-		if (unlikely(!src || !len)) {
-			rte_free(idesc);
-			return 0;
-		}
-
-		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
-
-		remain -= len;
-		dst += len;
-		desc_addr += len;
-	}
-
-	return idesc;
-}
-
 static __rte_always_inline void
 free_ind_table(void *idesc)
 {
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 35ae4992c2..494dd9957e 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -610,6 +610,35 @@  reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
+static void
+copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct buf_vector *buf_vec,
+		struct virtio_net_hdr_mrg_rxbuf *hdr){
+	uint64_t len;
+	uint64_t remain = dev->vhost_hlen;
+	uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+	uint64_t iova = buf_vec->buf_iova;
+
+	while (remain) {
+		len = RTE_MIN(remain,
+				buf_vec->buf_len);
+		dst = buf_vec->buf_addr;
+		rte_memcpy((void *)(uintptr_t)dst,
+				(void *)(uintptr_t)src,
+				len);
+
+		PRINT_PACKET(dev, (uintptr_t)dst,
+				(uint32_t)len, 0);
+		vhost_log_cache_write(dev, vq,
+				iova, len);
+
+		remain -= len;
+		iova += len;
+		src += len;
+		buf_vec++;
+	}
+}
+
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
@@ -703,30 +732,7 @@  copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 						num_buffers);
 
 			if (unlikely(hdr == &tmp_hdr)) {
-				uint64_t len;
-				uint64_t remain = dev->vhost_hlen;
-				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
-				uint64_t iova = buf_vec[0].buf_iova;
-				uint16_t hdr_vec_idx = 0;
-
-				while (remain) {
-					len = RTE_MIN(remain,
-						buf_vec[hdr_vec_idx].buf_len);
-					dst = buf_vec[hdr_vec_idx].buf_addr;
-					rte_memcpy((void *)(uintptr_t)dst,
-							(void *)(uintptr_t)src,
-							len);
-
-					PRINT_PACKET(dev, (uintptr_t)dst,
-							(uint32_t)len, 0);
-					vhost_log_cache_write(dev, vq,
-							iova, len);
-
-					remain -= len;
-					iova += len;
-					src += len;
-					hdr_vec_idx++;
-				}
+				copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
 			} else {
 				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
 						dev->vhost_hlen, 0);
@@ -1063,6 +1069,31 @@  vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
+static void
+copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
+		struct buf_vector *buf_vec)
+{
+	uint64_t len;
+	uint64_t remain = sizeof(struct virtio_net_hdr);
+	uint64_t src;
+	uint64_t dst = (uint64_t)(uintptr_t)&hdr;
+
+	/*
+	 * No luck, the virtio-net header doesn't fit
+	 * in a contiguous virtual area.
+	 */
+	while (remain) {
+		len = RTE_MIN(remain, buf_vec->buf_len);
+		src = buf_vec->buf_addr;
+		rte_memcpy((void *)(uintptr_t)dst,
+				(void *)(uintptr_t)src, len);
+
+		remain -= len;
+		dst += len;
+		buf_vec++;
+	}
+}
+
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		  struct buf_vector *buf_vec, uint16_t nr_vec,
@@ -1094,28 +1125,7 @@  copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	if (virtio_net_with_host_offload(dev)) {
 		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
-			uint64_t len;
-			uint64_t remain = sizeof(struct virtio_net_hdr);
-			uint64_t src;
-			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
-			uint16_t hdr_vec_idx = 0;
-
-			/*
-			 * No luck, the virtio-net header doesn't fit
-			 * in a contiguous virtual area.
-			 */
-			while (remain) {
-				len = RTE_MIN(remain,
-					buf_vec[hdr_vec_idx].buf_len);
-				src = buf_vec[hdr_vec_idx].buf_addr;
-				rte_memcpy((void *)(uintptr_t)dst,
-						   (void *)(uintptr_t)src, len);
-
-				remain -= len;
-				dst += len;
-				hdr_vec_idx++;
-			}
-
+			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
 			hdr = &tmp_hdr;
 		} else {
 			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);