[3/5] vhost: do not inline unlikely fragmented buffers code
Checks
Commit Message
Handling of fragmented virtio-net header and indirect descriptors
tables was implemented to fix CVE-2018-1059. It should not never
happen with healthy guests and so are already considered as
unlikely code path.
This patch moves these bits into non-inline dedicated functions
to reduce the I-cache pressure.
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
lib/librte_vhost/vhost.c | 33 +++++++++++
lib/librte_vhost/vhost.h | 35 +-----------
lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
3 files changed, 91 insertions(+), 79 deletions(-)
Comments
On 5/17/19 2:22 PM, Maxime Coquelin wrote:
> Handling of fragmented virtio-net header and indirect descriptors
> tables was implemented to fix CVE-2018-1059. It should not never
> happen with healthy guests and so are already considered as
> unlikely code path.
>
> This patch moves these bits into non-inline dedicated functions
> to reduce the I-cache pressure.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/librte_vhost/vhost.c | 33 +++++++++++
> lib/librte_vhost/vhost.h | 35 +-----------
> lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
> 3 files changed, 91 insertions(+), 79 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 4a54ad6bd1..8a4379bc13 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> }
>
>
> +void *
> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint64_t desc_addr, uint64_t desc_len)
> +{
> + void *idesc;
> + uint64_t src, dst;
> + uint64_t len, remain = desc_len;
> +
> + idesc = rte_malloc(__func__, desc_len, 0);
> + if (unlikely(!idesc))
> + return NULL;
> +
> + dst = (uint64_t)(uintptr_t)idesc;
> +
> + while (remain) {
> + len = remain;
> + src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> + VHOST_ACCESS_RO);
> + if (unlikely(!src || !len)) {
> + rte_free(idesc);
> + return NULL;
> + }
> +
> + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> +
> + remain -= len;
> + dst += len;
> + desc_addr += len;
> + }
> +
> + return idesc;
> +}
> +
> void
> cleanup_vq(struct vhost_virtqueue *vq, int destroy)
> {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3ab7b4950f..ab26454e1c 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>
> uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> uint64_t iova, uint64_t *len, uint8_t perm);
> +void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint64_t desc_addr, uint64_t desc_len);
> int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
> void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>
> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
> eventfd_write(vq->callfd, (eventfd_t)1);
> }
>
> -static __rte_always_inline void *
> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> - uint64_t desc_addr, uint64_t desc_len)
> -{
> - void *idesc;
> - uint64_t src, dst;
> - uint64_t len, remain = desc_len;
> -
> - idesc = rte_malloc(__func__, desc_len, 0);
> - if (unlikely(!idesc))
> - return 0;
> -
> - dst = (uint64_t)(uintptr_t)idesc;
> -
> - while (remain) {
> - len = remain;
> - src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> - VHOST_ACCESS_RO);
> - if (unlikely(!src || !len)) {
> - rte_free(idesc);
> - return 0;
> - }
> -
> - rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> -
> - remain -= len;
> - dst += len;
> - desc_addr += len;
> - }
> -
> - return idesc;
> -}
> -
> static __rte_always_inline void
> free_ind_table(void *idesc)
> {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 35ae4992c2..494dd9957e 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> return 0;
> }
>
> +static void
> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + struct buf_vector *buf_vec,
> + struct virtio_net_hdr_mrg_rxbuf *hdr){
I seem to have missed above open bracket coding style issue while
running checkpatch.
Will fix in next revision.
> + uint64_t len;
> + uint64_t remain = dev->vhost_hlen;
> + uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> + uint64_t iova = buf_vec->buf_iova;
> +
> + while (remain) {
> + len = RTE_MIN(remain,
> + buf_vec->buf_len);
> + dst = buf_vec->buf_addr;
> + rte_memcpy((void *)(uintptr_t)dst,
> + (void *)(uintptr_t)src,
> + len);
> +
> + PRINT_PACKET(dev, (uintptr_t)dst,
> + (uint32_t)len, 0);
> + vhost_log_cache_write(dev, vq,
> + iova, len);
> +
> + remain -= len;
> + iova += len;
> + src += len;
> + buf_vec++;
> + }
> +}
> +
> static __rte_always_inline int
> copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct rte_mbuf *m, struct buf_vector *buf_vec,
> @@ -703,30 +732,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> num_buffers);
>
> if (unlikely(hdr == &tmp_hdr)) {
> - uint64_t len;
> - uint64_t remain = dev->vhost_hlen;
> - uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> - uint64_t iova = buf_vec[0].buf_iova;
> - uint16_t hdr_vec_idx = 0;
> -
> - while (remain) {
> - len = RTE_MIN(remain,
> - buf_vec[hdr_vec_idx].buf_len);
> - dst = buf_vec[hdr_vec_idx].buf_addr;
> - rte_memcpy((void *)(uintptr_t)dst,
> - (void *)(uintptr_t)src,
> - len);
> -
> - PRINT_PACKET(dev, (uintptr_t)dst,
> - (uint32_t)len, 0);
> - vhost_log_cache_write(dev, vq,
> - iova, len);
> -
> - remain -= len;
> - iova += len;
> - src += len;
> - hdr_vec_idx++;
> - }
> + copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
> } else {
> PRINT_PACKET(dev, (uintptr_t)hdr_addr,
> dev->vhost_hlen, 0);
> @@ -1063,6 +1069,31 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
> }
> }
>
> +static void
> +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
> + struct buf_vector *buf_vec)
> +{
> + uint64_t len;
> + uint64_t remain = sizeof(struct virtio_net_hdr);
> + uint64_t src;
> + uint64_t dst = (uint64_t)(uintptr_t)&hdr;
> +
> + /*
> + * No luck, the virtio-net header doesn't fit
> + * in a contiguous virtual area.
> + */
> + while (remain) {
> + len = RTE_MIN(remain, buf_vec->buf_len);
> + src = buf_vec->buf_addr;
> + rte_memcpy((void *)(uintptr_t)dst,
> + (void *)(uintptr_t)src, len);
> +
> + remain -= len;
> + dst += len;
> + buf_vec++;
> + }
> +}
> +
> static __rte_always_inline int
> copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct buf_vector *buf_vec, uint16_t nr_vec,
> @@ -1094,28 +1125,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
> if (virtio_net_with_host_offload(dev)) {
> if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> - uint64_t len;
> - uint64_t remain = sizeof(struct virtio_net_hdr);
> - uint64_t src;
> - uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
> - uint16_t hdr_vec_idx = 0;
> -
> - /*
> - * No luck, the virtio-net header doesn't fit
> - * in a contiguous virtual area.
> - */
> - while (remain) {
> - len = RTE_MIN(remain,
> - buf_vec[hdr_vec_idx].buf_len);
> - src = buf_vec[hdr_vec_idx].buf_addr;
> - rte_memcpy((void *)(uintptr_t)dst,
> - (void *)(uintptr_t)src, len);
> -
> - remain -= len;
> - dst += len;
> - hdr_vec_idx++;
> - }
> -
> + copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
> hdr = &tmp_hdr;
> } else {
> hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
>
On 2019-05-17 14:22, Maxime Coquelin wrote:
> Handling of fragmented virtio-net header and indirect descriptors
> tables was implemented to fix CVE-2018-1059. It should not never
> happen with healthy guests and so are already considered as
> unlikely code path.
>
> This patch moves these bits into non-inline dedicated functions
> to reduce the I-cache pressure.
>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> lib/librte_vhost/vhost.c | 33 +++++++++++
> lib/librte_vhost/vhost.h | 35 +-----------
> lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
> 3 files changed, 91 insertions(+), 79 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 4a54ad6bd1..8a4379bc13 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
> }
>
>
> +void *
> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
This function should have a prefix.
> + uint64_t desc_addr, uint64_t desc_len)
> +{
> + void *idesc;
> + uint64_t src, dst;
> + uint64_t len, remain = desc_len;
> +
> + idesc = rte_malloc(__func__, desc_len, 0);
> + if (unlikely(!idesc))
if (idesc == NULL)
> + return NULL;
> +
> + dst = (uint64_t)(uintptr_t)idesc;
> +
> + while (remain) {
remain > 0
> + len = remain;
> + src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> + VHOST_ACCESS_RO);
> + if (unlikely(!src || !len)) {
> + rte_free(idesc);
> + return NULL;
> + }
> +
> + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
Just for my understanding: what difference does that (uintptr_t) cast do?
> +
> + remain -= len;
> + dst += len;
> + desc_addr += len;
> + }
> +
> + return idesc;
> +}
> +
> void
> cleanup_vq(struct vhost_virtqueue *vq, int destroy)
> {
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 3ab7b4950f..ab26454e1c 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>
> uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
> uint64_t iova, uint64_t *len, uint8_t perm);
> +void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint64_t desc_addr, uint64_t desc_len);
> int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
> void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
>
> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
> eventfd_write(vq->callfd, (eventfd_t)1);
> }
>
> -static __rte_always_inline void *
> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
> - uint64_t desc_addr, uint64_t desc_len)
> -{
> - void *idesc;
> - uint64_t src, dst;
> - uint64_t len, remain = desc_len;
> -
> - idesc = rte_malloc(__func__, desc_len, 0);
> - if (unlikely(!idesc))
> - return 0;
> -
> - dst = (uint64_t)(uintptr_t)idesc;
> -
> - while (remain) {
> - len = remain;
> - src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
> - VHOST_ACCESS_RO);
> - if (unlikely(!src || !len)) {
> - rte_free(idesc);
> - return 0;
> - }
> -
> - rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
> -
> - remain -= len;
> - dst += len;
> - desc_addr += len;
> - }
> -
> - return idesc;
> -}
> -
> static __rte_always_inline void
> free_ind_table(void *idesc)
> {
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 35ae4992c2..494dd9957e 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
> return 0;
> }
>
> +static void
> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
__rte_noinline? Or you don't care about this function being inlined or not?
> + struct buf_vector *buf_vec,
> + struct virtio_net_hdr_mrg_rxbuf *hdr){
> + uint64_t len;
> + uint64_t remain = dev->vhost_hlen;
> + uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> + uint64_t iova = buf_vec->buf_iova;
> +
> + while (remain) {
remain > 0
> + len = RTE_MIN(remain,
> + buf_vec->buf_len);
> + dst = buf_vec->buf_addr;
> + rte_memcpy((void *)(uintptr_t)dst,
> + (void *)(uintptr_t)src,
> + len);
> +
> + PRINT_PACKET(dev, (uintptr_t)dst,
> + (uint32_t)len, 0);
> + vhost_log_cache_write(dev, vq,
> + iova, len);
> +
> + remain -= len;
> + iova += len;
> + src += len;
> + buf_vec++;
> + }
> +}
> +
> static __rte_always_inline int
> copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct rte_mbuf *m, struct buf_vector *buf_vec,
> @@ -703,30 +732,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> num_buffers);
>
> if (unlikely(hdr == &tmp_hdr)) {
> - uint64_t len;
> - uint64_t remain = dev->vhost_hlen;
> - uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
> - uint64_t iova = buf_vec[0].buf_iova;
> - uint16_t hdr_vec_idx = 0;
> -
> - while (remain) {
> - len = RTE_MIN(remain,
> - buf_vec[hdr_vec_idx].buf_len);
> - dst = buf_vec[hdr_vec_idx].buf_addr;
> - rte_memcpy((void *)(uintptr_t)dst,
> - (void *)(uintptr_t)src,
> - len);
> -
> - PRINT_PACKET(dev, (uintptr_t)dst,
> - (uint32_t)len, 0);
> - vhost_log_cache_write(dev, vq,
> - iova, len);
> -
> - remain -= len;
> - iova += len;
> - src += len;
> - hdr_vec_idx++;
> - }
> + copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
> } else {
> PRINT_PACKET(dev, (uintptr_t)hdr_addr,
> dev->vhost_hlen, 0);
> @@ -1063,6 +1069,31 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
> }
> }
>
> +static void
> +copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
> + struct buf_vector *buf_vec)
__rte_noinline?
> +{
> + uint64_t len;
> + uint64_t remain = sizeof(struct virtio_net_hdr);
> + uint64_t src;
> + uint64_t dst = (uint64_t)(uintptr_t)&hdr;
> +
> + /*
> + * No luck, the virtio-net header doesn't fit
> + * in a contiguous virtual area.
> + */
> + while (remain) {
> + len = RTE_MIN(remain, buf_vec->buf_len);
> + src = buf_vec->buf_addr;
> + rte_memcpy((void *)(uintptr_t)dst,
> + (void *)(uintptr_t)src, len);
> +
> + remain -= len;
> + dst += len;
> + buf_vec++;
> + }
> +}
> +
> static __rte_always_inline int
> copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
> struct buf_vector *buf_vec, uint16_t nr_vec,
> @@ -1094,28 +1125,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
> if (virtio_net_with_host_offload(dev)) {
> if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> - uint64_t len;
> - uint64_t remain = sizeof(struct virtio_net_hdr);
> - uint64_t src;
> - uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
> - uint16_t hdr_vec_idx = 0;
> -
> - /*
> - * No luck, the virtio-net header doesn't fit
> - * in a contiguous virtual area.
> - */
> - while (remain) {
> - len = RTE_MIN(remain,
> - buf_vec[hdr_vec_idx].buf_len);
> - src = buf_vec[hdr_vec_idx].buf_addr;
> - rte_memcpy((void *)(uintptr_t)dst,
> - (void *)(uintptr_t)src, len);
> -
> - remain -= len;
> - dst += len;
> - hdr_vec_idx++;
> - }
> -
> + copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
> hdr = &tmp_hdr;
> } else {
> hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
>
Hi Mattias,
On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
> On 2019-05-17 14:22, Maxime Coquelin wrote:
>> Handling of fragmented virtio-net header and indirect descriptors
>> tables was implemented to fix CVE-2018-1059. It should not never
>> happen with healthy guests and so are already considered as
>> unlikely code path.
>>
>> This patch moves these bits into non-inline dedicated functions
>> to reduce the I-cache pressure.
>>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>> lib/librte_vhost/vhost.c | 33 +++++++++++
>> lib/librte_vhost/vhost.h | 35 +-----------
>> lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>> 3 files changed, 91 insertions(+), 79 deletions(-)
>>
>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>> index 4a54ad6bd1..8a4379bc13 100644
>> --- a/lib/librte_vhost/vhost.c
>> +++ b/lib/librte_vhost/vhost.c
>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev,
>> struct vhost_virtqueue *vq,
>> }
>> +void *
>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
>
> This function should have a prefix.
This function is just moved from vhost.h to vhost.c, so not the purpose
of the patch.
But I agree your comment, I'll send a patch to add a prefix.
>
>> + uint64_t desc_addr, uint64_t desc_len)
>> +{
>> + void *idesc;
>> + uint64_t src, dst;
>> + uint64_t len, remain = desc_len;
>> +
>> + idesc = rte_malloc(__func__, desc_len, 0);
>> + if (unlikely(!idesc))
>
> if (idesc == NULL)
Ditto, that is not the purpose of the patch that is just moving the
function.
I agree this is not matching the coding rules specified in the
documentation, though.
>
>> + return NULL;
>> +
>> + dst = (uint64_t)(uintptr_t)idesc;
>> +
>> + while (remain) {
> remain > 0
Ditto.
>> + len = remain;
>> + src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>> + VHOST_ACCESS_RO);
>> + if (unlikely(!src || !len)) {
>> + rte_free(idesc);
>> + return NULL;
>> + }
>> +
>> + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
>
> Just for my understanding: what difference does that (uintptr_t) cast do?
This is required to build 32bits (-Werror=int-to-pointer-cast)
>> +
>> + remain -= len;
>> + dst += len;
>> + desc_addr += len;
>> + }
>> +
>> + return idesc;
>> +}
>> +
>> void
>> cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>> {
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 3ab7b4950f..ab26454e1c 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>> uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
>> vhost_virtqueue *vq,
>> uint64_t iova, uint64_t *len, uint8_t perm);
>> +void *alloc_copy_ind_table(struct virtio_net *dev, struct
>> vhost_virtqueue *vq,
>> + uint64_t desc_addr, uint64_t desc_len);
>> int vring_translate(struct virtio_net *dev, struct vhost_virtqueue
>> *vq);
>> void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue
>> *vq);
>> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev,
>> struct vhost_virtqueue *vq)
>> eventfd_write(vq->callfd, (eventfd_t)1);
>> }
>> -static __rte_always_inline void *
>> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
>> - uint64_t desc_addr, uint64_t desc_len)
>> -{
>> - void *idesc;
>> - uint64_t src, dst;
>> - uint64_t len, remain = desc_len;
>> -
>> - idesc = rte_malloc(__func__, desc_len, 0);
>> - if (unlikely(!idesc))
>> - return 0;
>> -
>> - dst = (uint64_t)(uintptr_t)idesc;
>> -
>> - while (remain) {
>> - len = remain;
>> - src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>> - VHOST_ACCESS_RO);
>> - if (unlikely(!src || !len)) {
>> - rte_free(idesc);
>> - return 0;
>> - }
>> -
>> - rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
>> -
>> - remain -= len;
>> - dst += len;
>> - desc_addr += len;
>> - }
>> -
>> - return idesc;
>> -}
>> -
>> static __rte_always_inline void
>> free_ind_table(void *idesc)
>> {
>> diff --git a/lib/librte_vhost/virtio_net.c
>> b/lib/librte_vhost/virtio_net.c
>> index 35ae4992c2..494dd9957e 100644
>> --- a/lib/librte_vhost/virtio_net.c
>> +++ b/lib/librte_vhost/virtio_net.c
>> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev,
>> struct vhost_virtqueue *vq,
>> return 0;
>> }
>> +static void
>> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue
>> *vq,
>
> __rte_noinline? Or you don't care about this function being inlined or not?
Right, I'll add it here and there in next revision.
I'll try to send a patch to fix the kind of style issues you reported.
If you want to do it that would be great, just let me know.
Thanks,
Maxime
On 2019-05-23 16:30, Maxime Coquelin wrote:
> Hi Mattias,
>
> On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
>> On 2019-05-17 14:22, Maxime Coquelin wrote:
>>> Handling of fragmented virtio-net header and indirect descriptors
>>> tables was implemented to fix CVE-2018-1059. It should not never
>>> happen with healthy guests and so are already considered as
>>> unlikely code path.
>>>
>>> This patch moves these bits into non-inline dedicated functions
>>> to reduce the I-cache pressure.
>>>
>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>> ---
>>> lib/librte_vhost/vhost.c | 33 +++++++++++
>>> lib/librte_vhost/vhost.h | 35 +-----------
>>> lib/librte_vhost/virtio_net.c | 102 +++++++++++++++++++---------------
>>> 3 files changed, 91 insertions(+), 79 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>> index 4a54ad6bd1..8a4379bc13 100644
>>> --- a/lib/librte_vhost/vhost.c
>>> +++ b/lib/librte_vhost/vhost.c
>>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev,
>>> struct vhost_virtqueue *vq,
>>> }
>>> +void *
>>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue
>>> *vq,
>>
>> This function should have a prefix.
>
> This function is just moved from vhost.h to vhost.c, so not the purpose
> of the patch.
>
It was declared "static inline" in the header file, and thus only
affected those who included the file, as opposed to polluting the whole
DPDK library name space.
> But I agree your comment, I'll send a patch to add a prefix.
>
>>
>>> + uint64_t desc_addr, uint64_t desc_len)
>>> +{
>>> + void *idesc;
>>> + uint64_t src, dst;
>>> + uint64_t len, remain = desc_len;
>>> +
>>> + idesc = rte_malloc(__func__, desc_len, 0);
>>> + if (unlikely(!idesc))
>>
>> if (idesc == NULL)
>
> Ditto, that is not the purpose of the patch that is just moving the
> function.
>
> I agree this is not matching the coding rules specified in the
> documentation, though.
>
>>
>>> + return NULL;
>>> +
>>> + dst = (uint64_t)(uintptr_t)idesc;
>>> +
>>> + while (remain) {
>> remain > 0
>
> Ditto.
>
>>> + len = remain;
>>> + src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>>> + VHOST_ACCESS_RO);
>>> + if (unlikely(!src || !len)) {
>>> + rte_free(idesc);
>>> + return NULL;
>>> + }
>>> +
>>> + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src,
>>> len);
>>
>> Just for my understanding: what difference does that (uintptr_t) cast do?
>
> This is required to build 32bits (-Werror=int-to-pointer-cast)
>
Ah. Thanks.
>>> +
>>> + remain -= len;
>>> + dst += len;
>>> + desc_addr += len;
>>> + }
>>> +
>>> + return idesc;
>>> +}
>>> +
>>> void
>>> cleanup_vq(struct vhost_virtqueue *vq, int destroy)
>>> {
>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>> index 3ab7b4950f..ab26454e1c 100644
>>> --- a/lib/librte_vhost/vhost.h
>>> +++ b/lib/librte_vhost/vhost.h
>>> @@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
>>> uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct
>>> vhost_virtqueue *vq,
>>> uint64_t iova, uint64_t *len, uint8_t perm);
>>> +void *alloc_copy_ind_table(struct virtio_net *dev, struct
>>> vhost_virtqueue *vq,
>>> + uint64_t desc_addr, uint64_t desc_len);
>>> int vring_translate(struct virtio_net *dev, struct vhost_virtqueue
>>> *vq);
>>> void vring_invalidate(struct virtio_net *dev, struct
>>> vhost_virtqueue *vq);
>>> @@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev,
>>> struct vhost_virtqueue *vq)
>>> eventfd_write(vq->callfd, (eventfd_t)1);
>>> }
>>> -static __rte_always_inline void *
>>> -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue
>>> *vq,
>>> - uint64_t desc_addr, uint64_t desc_len)
>>> -{
>>> - void *idesc;
>>> - uint64_t src, dst;
>>> - uint64_t len, remain = desc_len;
>>> -
>>> - idesc = rte_malloc(__func__, desc_len, 0);
>>> - if (unlikely(!idesc))
>>> - return 0;
>>> -
>>> - dst = (uint64_t)(uintptr_t)idesc;
>>> -
>>> - while (remain) {
>>> - len = remain;
>>> - src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
>>> - VHOST_ACCESS_RO);
>>> - if (unlikely(!src || !len)) {
>>> - rte_free(idesc);
>>> - return 0;
>>> - }
>>> -
>>> - rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src,
>>> len);
>>> -
>>> - remain -= len;
>>> - dst += len;
>>> - desc_addr += len;
>>> - }
>>> -
>>> - return idesc;
>>> -}
>>> -
>>> static __rte_always_inline void
>>> free_ind_table(void *idesc)
>>> {
>>> diff --git a/lib/librte_vhost/virtio_net.c
>>> b/lib/librte_vhost/virtio_net.c
>>> index 35ae4992c2..494dd9957e 100644
>>> --- a/lib/librte_vhost/virtio_net.c
>>> +++ b/lib/librte_vhost/virtio_net.c
>>> @@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev,
>>> struct vhost_virtqueue *vq,
>>> return 0;
>>> }
>>> +static void
>>> +copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue
>>> *vq,
>>
>> __rte_noinline? Or you don't care about this function being inlined or
>> not?
>
> Right, I'll add it here and there in next revision.
>
> I'll try to send a patch to fix the kind of style issues you reported.
> If you want to do it that would be great, just let me know.
>
I just figured it made sense to address some style issues when you were
shuffling things around.
On 5/23/19 5:17 PM, Mattias Rönnblom wrote:
> On 2019-05-23 16:30, Maxime Coquelin wrote:
>> Hi Mattias,
>>
>> On 5/21/19 9:43 PM, Mattias Rönnblom wrote:
>>> On 2019-05-17 14:22, Maxime Coquelin wrote:
>>>> Handling of fragmented virtio-net header and indirect descriptors
>>>> tables was implemented to fix CVE-2018-1059. It should not never
>>>> happen with healthy guests and so are already considered as
>>>> unlikely code path.
>>>>
>>>> This patch moves these bits into non-inline dedicated functions
>>>> to reduce the I-cache pressure.
>>>>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>> lib/librte_vhost/vhost.c | 33 +++++++++++
>>>> lib/librte_vhost/vhost.h | 35 +-----------
>>>> lib/librte_vhost/virtio_net.c | 102
>>>> +++++++++++++++++++---------------
>>>> 3 files changed, 91 insertions(+), 79 deletions(-)
>>>>
>>>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>>> index 4a54ad6bd1..8a4379bc13 100644
>>>> --- a/lib/librte_vhost/vhost.c
>>>> +++ b/lib/librte_vhost/vhost.c
>>>> @@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev,
>>>> struct vhost_virtqueue *vq,
>>>> }
>>>> +void *
>>>> +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue
>>>> *vq,
>>>
>>> This function should have a prefix.
>>
>> This function is just moved from vhost.h to vhost.c, so not the purpose
>> of the patch.
>>
>
> It was declared "static inline" in the header file, and thus only
> affected those who included the file, as opposed to polluting the whole
> DPDK library name space.
Right, I'll fix the name in next revision.
Thanks,
Maxime
@@ -201,6 +201,39 @@ __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
+void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t desc_addr, uint64_t desc_len)
+{
+ void *idesc;
+ uint64_t src, dst;
+ uint64_t len, remain = desc_len;
+
+ idesc = rte_malloc(__func__, desc_len, 0);
+ if (unlikely(!idesc))
+ return NULL;
+
+ dst = (uint64_t)(uintptr_t)idesc;
+
+ while (remain) {
+ len = remain;
+ src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!src || !len)) {
+ rte_free(idesc);
+ return NULL;
+ }
+
+ rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ desc_addr += len;
+ }
+
+ return idesc;
+}
+
void
cleanup_vq(struct vhost_virtqueue *vq, int destroy)
{
@@ -488,6 +488,8 @@ void vhost_backend_cleanup(struct virtio_net *dev);
uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t iova, uint64_t *len, uint8_t perm);
+void *alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t desc_addr, uint64_t desc_len);
int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
@@ -601,39 +603,6 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
eventfd_write(vq->callfd, (eventfd_t)1);
}
-static __rte_always_inline void *
-alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint64_t desc_addr, uint64_t desc_len)
-{
- void *idesc;
- uint64_t src, dst;
- uint64_t len, remain = desc_len;
-
- idesc = rte_malloc(__func__, desc_len, 0);
- if (unlikely(!idesc))
- return 0;
-
- dst = (uint64_t)(uintptr_t)idesc;
-
- while (remain) {
- len = remain;
- src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
- VHOST_ACCESS_RO);
- if (unlikely(!src || !len)) {
- rte_free(idesc);
- return 0;
- }
-
- rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
-
- remain -= len;
- dst += len;
- desc_addr += len;
- }
-
- return idesc;
-}
-
static __rte_always_inline void
free_ind_table(void *idesc)
{
@@ -610,6 +610,35 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
return 0;
}
+static void
+copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct buf_vector *buf_vec,
+ struct virtio_net_hdr_mrg_rxbuf *hdr){
+ uint64_t len;
+ uint64_t remain = dev->vhost_hlen;
+ uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+ uint64_t iova = buf_vec->buf_iova;
+
+ while (remain) {
+ len = RTE_MIN(remain,
+ buf_vec->buf_len);
+ dst = buf_vec->buf_addr;
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src,
+ len);
+
+ PRINT_PACKET(dev, (uintptr_t)dst,
+ (uint32_t)len, 0);
+ vhost_log_cache_write(dev, vq,
+ iova, len);
+
+ remain -= len;
+ iova += len;
+ src += len;
+ buf_vec++;
+ }
+}
+
static __rte_always_inline int
copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
@@ -703,30 +732,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
num_buffers);
if (unlikely(hdr == &tmp_hdr)) {
- uint64_t len;
- uint64_t remain = dev->vhost_hlen;
- uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
- uint64_t iova = buf_vec[0].buf_iova;
- uint16_t hdr_vec_idx = 0;
-
- while (remain) {
- len = RTE_MIN(remain,
- buf_vec[hdr_vec_idx].buf_len);
- dst = buf_vec[hdr_vec_idx].buf_addr;
- rte_memcpy((void *)(uintptr_t)dst,
- (void *)(uintptr_t)src,
- len);
-
- PRINT_PACKET(dev, (uintptr_t)dst,
- (uint32_t)len, 0);
- vhost_log_cache_write(dev, vq,
- iova, len);
-
- remain -= len;
- iova += len;
- src += len;
- hdr_vec_idx++;
- }
+ copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
} else {
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
@@ -1063,6 +1069,31 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
}
}
+static void
+copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
+ struct buf_vector *buf_vec)
+{
+ uint64_t len;
+ uint64_t remain = sizeof(struct virtio_net_hdr);
+ uint64_t src;
+ uint64_t dst = (uint64_t)(uintptr_t)&hdr;
+
+ /*
+ * No luck, the virtio-net header doesn't fit
+ * in a contiguous virtual area.
+ */
+ while (remain) {
+ len = RTE_MIN(remain, buf_vec->buf_len);
+ src = buf_vec->buf_addr;
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ buf_vec++;
+ }
+}
+
static __rte_always_inline int
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct buf_vector *buf_vec, uint16_t nr_vec,
@@ -1094,28 +1125,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (virtio_net_with_host_offload(dev)) {
if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
- uint64_t len;
- uint64_t remain = sizeof(struct virtio_net_hdr);
- uint64_t src;
- uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
- uint16_t hdr_vec_idx = 0;
-
- /*
- * No luck, the virtio-net header doesn't fit
- * in a contiguous virtual area.
- */
- while (remain) {
- len = RTE_MIN(remain,
- buf_vec[hdr_vec_idx].buf_len);
- src = buf_vec[hdr_vec_idx].buf_addr;
- rte_memcpy((void *)(uintptr_t)dst,
- (void *)(uintptr_t)src, len);
-
- remain -= len;
- dst += len;
- hdr_vec_idx++;
- }
-
+ copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
hdr = &tmp_hdr;
} else {
hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);