[5/5] net/virtio: optimize xmit enqueue for packed ring

Message ID 20190219105951.31046-6-tiwei.bie@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series Fixes and enhancements for Tx path in Virtio PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Tiwei Bie Feb. 19, 2019, 10:59 a.m. UTC
  This patch introduces an optimized enqueue function in packed
ring for the case that virtio net header can be prepended to
the unchained mbuf.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 2 deletions(-)
  

Comments

Maxime Coquelin Feb. 21, 2019, 11:22 a.m. UTC | #1
On 2/19/19 11:59 AM, Tiwei Bie wrote:
> This patch introduces an optimized enqueue function in packed
> ring for the case that virtio net header can be prepended to
> the unchained mbuf.
> 
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
>   drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
>   1 file changed, 61 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index 60fa3aa50..771d3c3f6 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
>   	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
>   }
>   
> +static inline void
> +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
> +				   struct rte_mbuf *cookie,
> +				   int in_order)
> +{
> +	struct virtqueue *vq = txvq->vq;
> +	struct vring_packed_desc *dp;
> +	struct vq_desc_extra *dxp;
> +	uint16_t idx, id, flags;
> +	uint16_t head_size = vq->hw->vtnet_hdr_size;
> +	struct virtio_net_hdr *hdr;
> +
> +	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
> +	idx = vq->vq_avail_idx;
> +	dp = &vq->ring_packed.desc_packed[idx];
> +
> +	dxp = &vq->vq_descx[id];
> +	dxp->ndescs = 1;
> +	dxp->cookie = cookie;
> +
> +	flags = vq->avail_used_flags;
> +
> +	/* prepend cannot fail, checked by caller */
> +	hdr = (struct virtio_net_hdr *)
> +		rte_pktmbuf_prepend(cookie, head_size);
> +	cookie->pkt_len -= head_size;
> +
> +	/* if offload disabled, hdr is not zeroed yet, do it now */
> +	if (!vq->hw->has_tx_offload)
> +		virtqueue_clear_net_hdr(hdr);
> +	else
> +		virtqueue_xmit_offload(hdr, cookie, true);
> +
> +	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
> +	dp->len  = cookie->data_len;
> +	dp->id   = id;
> +
> +	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> +		vq->vq_avail_idx -= vq->vq_nentries;
> +		vq->avail_wrap_counter ^= 1;
> +		vq->avail_used_flags ^=
> +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> +	}
> +
> +	vq->vq_free_cnt--;
> +
> +	if (!in_order) {
> +		vq->vq_desc_head_idx = dxp->next;
> +		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> +			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
> +	}
> +
> +	virtio_wmb(vq->hw->weak_barriers);
> +	dp->flags = flags;
> +}
> +
>   static inline void
>   virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
>   			      uint16_t needed, int can_push, int in_order)
> @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
>   		}
>   
>   		/* Enqueue Packet buffers */
> -		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
> -					      in_order);
> +		if (can_push)
> +			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
> +		else
> +			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
> +						      in_order);
>   
>   		virtio_update_packet_stats(&txvq->stats, txm);
>   	}
> 

I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
simplified to get rid off "can_push" now that this case as a dedicated
function?

Regards,
Maxime
  
Tiwei Bie Feb. 21, 2019, 12:25 p.m. UTC | #2
On Thu, Feb 21, 2019 at 12:22:29PM +0100, Maxime Coquelin wrote:
> On 2/19/19 11:59 AM, Tiwei Bie wrote:
> > This patch introduces an optimized enqueue function in packed
> > ring for the case that virtio net header can be prepended to
> > the unchained mbuf.
> > 
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> >   drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
> >   1 file changed, 61 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 60fa3aa50..771d3c3f6 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
> >   	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
> >   }
> > +static inline void
> > +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
> > +				   struct rte_mbuf *cookie,
> > +				   int in_order)
> > +{
> > +	struct virtqueue *vq = txvq->vq;
> > +	struct vring_packed_desc *dp;
> > +	struct vq_desc_extra *dxp;
> > +	uint16_t idx, id, flags;
> > +	uint16_t head_size = vq->hw->vtnet_hdr_size;
> > +	struct virtio_net_hdr *hdr;
> > +
> > +	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
> > +	idx = vq->vq_avail_idx;
> > +	dp = &vq->ring_packed.desc_packed[idx];
> > +
> > +	dxp = &vq->vq_descx[id];
> > +	dxp->ndescs = 1;
> > +	dxp->cookie = cookie;
> > +
> > +	flags = vq->avail_used_flags;
> > +
> > +	/* prepend cannot fail, checked by caller */
> > +	hdr = (struct virtio_net_hdr *)
> > +		rte_pktmbuf_prepend(cookie, head_size);
> > +	cookie->pkt_len -= head_size;
> > +
> > +	/* if offload disabled, hdr is not zeroed yet, do it now */
> > +	if (!vq->hw->has_tx_offload)
> > +		virtqueue_clear_net_hdr(hdr);
> > +	else
> > +		virtqueue_xmit_offload(hdr, cookie, true);
> > +
> > +	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
> > +	dp->len  = cookie->data_len;
> > +	dp->id   = id;
> > +
> > +	if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > +		vq->vq_avail_idx -= vq->vq_nentries;
> > +		vq->avail_wrap_counter ^= 1;
> > +		vq->avail_used_flags ^=
> > +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > +	}
> > +
> > +	vq->vq_free_cnt--;
> > +
> > +	if (!in_order) {
> > +		vq->vq_desc_head_idx = dxp->next;
> > +		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> > +			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
> > +	}
> > +
> > +	virtio_wmb(vq->hw->weak_barriers);
> > +	dp->flags = flags;
> > +}
> > +
> >   static inline void
> >   virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
> >   			      uint16_t needed, int can_push, int in_order)
> > @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
> >   		}
> >   		/* Enqueue Packet buffers */
> > -		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
> > -					      in_order);
> > +		if (can_push)
> > +			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
> > +		else
> > +			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
> > +						      in_order);
> >   		virtio_update_packet_stats(&txvq->stats, txm);
> >   	}
> > 
> 
> I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
> simplified to get rid off "can_push" now that this case as a dedicated
> function?

Yeah, I had the same thought. But after a second thought, I
think we may also want to push the net hdr to the mbuf even
if its nb_segs isn't 1 in the future, so I left it untouched.

Thanks,
Tiwei
  
Maxime Coquelin Feb. 21, 2019, 12:31 p.m. UTC | #3
On 2/21/19 1:25 PM, Tiwei Bie wrote:
> On Thu, Feb 21, 2019 at 12:22:29PM +0100, Maxime Coquelin wrote:
>> On 2/19/19 11:59 AM, Tiwei Bie wrote:
>>> This patch introduces an optimized enqueue function in packed
>>> ring for the case that virtio net header can be prepended to
>>> the unchained mbuf.
>>>
>>> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>>> ---
>>>    drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
>>>    1 file changed, 61 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>>> index 60fa3aa50..771d3c3f6 100644
>>> --- a/drivers/net/virtio/virtio_rxtx.c
>>> +++ b/drivers/net/virtio/virtio_rxtx.c
>>> @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
>>>    	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
>>>    }
>>> +static inline void
>>> +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
>>> +				   struct rte_mbuf *cookie,
>>> +				   int in_order)
>>> +{
>>> +	struct virtqueue *vq = txvq->vq;
>>> +	struct vring_packed_desc *dp;
>>> +	struct vq_desc_extra *dxp;
>>> +	uint16_t idx, id, flags;
>>> +	uint16_t head_size = vq->hw->vtnet_hdr_size;
>>> +	struct virtio_net_hdr *hdr;
>>> +
>>> +	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
>>> +	idx = vq->vq_avail_idx;
>>> +	dp = &vq->ring_packed.desc_packed[idx];
>>> +
>>> +	dxp = &vq->vq_descx[id];
>>> +	dxp->ndescs = 1;
>>> +	dxp->cookie = cookie;
>>> +
>>> +	flags = vq->avail_used_flags;
>>> +
>>> +	/* prepend cannot fail, checked by caller */
>>> +	hdr = (struct virtio_net_hdr *)
>>> +		rte_pktmbuf_prepend(cookie, head_size);
>>> +	cookie->pkt_len -= head_size;
>>> +
>>> +	/* if offload disabled, hdr is not zeroed yet, do it now */
>>> +	if (!vq->hw->has_tx_offload)
>>> +		virtqueue_clear_net_hdr(hdr);
>>> +	else
>>> +		virtqueue_xmit_offload(hdr, cookie, true);
>>> +
>>> +	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
>>> +	dp->len  = cookie->data_len;
>>> +	dp->id   = id;
>>> +
>>> +	if (++vq->vq_avail_idx >= vq->vq_nentries) {
>>> +		vq->vq_avail_idx -= vq->vq_nentries;
>>> +		vq->avail_wrap_counter ^= 1;
>>> +		vq->avail_used_flags ^=
>>> +			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>>> +	}
>>> +
>>> +	vq->vq_free_cnt--;
>>> +
>>> +	if (!in_order) {
>>> +		vq->vq_desc_head_idx = dxp->next;
>>> +		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
>>> +			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
>>> +	}
>>> +
>>> +	virtio_wmb(vq->hw->weak_barriers);
>>> +	dp->flags = flags;
>>> +}
>>> +
>>>    static inline void
>>>    virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
>>>    			      uint16_t needed, int can_push, int in_order)
>>> @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
>>>    		}
>>>    		/* Enqueue Packet buffers */
>>> -		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
>>> -					      in_order);
>>> +		if (can_push)
>>> +			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
>>> +		else
>>> +			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
>>> +						      in_order);
>>>    		virtio_update_packet_stats(&txvq->stats, txm);
>>>    	}
>>>
>>
>> I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
>> simplified to get rid off "can_push" now that this case as a dedicated
>> function?
> 
> Yeah, I had the same thought. But after a second thought, I
> think we may also want to push the net hdr to the mbuf even
> if its nb_segs isn't 1 in the future, so I left it untouched.

Ok, that makes sense.
And I think doing the change won't affect the generated code much
anyway, as "can_push" related things will be stripped out at compilation 
time.

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime

> Thanks,
> Tiwei
>
  

Patch

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 60fa3aa50..771d3c3f6 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -623,6 +623,62 @@  virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
 }
 
+static inline void
+virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
+				   struct rte_mbuf *cookie,
+				   int in_order)
+{
+	struct virtqueue *vq = txvq->vq;
+	struct vring_packed_desc *dp;
+	struct vq_desc_extra *dxp;
+	uint16_t idx, id, flags;
+	uint16_t head_size = vq->hw->vtnet_hdr_size;
+	struct virtio_net_hdr *hdr;
+
+	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
+	idx = vq->vq_avail_idx;
+	dp = &vq->ring_packed.desc_packed[idx];
+
+	dxp = &vq->vq_descx[id];
+	dxp->ndescs = 1;
+	dxp->cookie = cookie;
+
+	flags = vq->avail_used_flags;
+
+	/* prepend cannot fail, checked by caller */
+	hdr = (struct virtio_net_hdr *)
+		rte_pktmbuf_prepend(cookie, head_size);
+	cookie->pkt_len -= head_size;
+
+	/* if offload disabled, hdr is not zeroed yet, do it now */
+	if (!vq->hw->has_tx_offload)
+		virtqueue_clear_net_hdr(hdr);
+	else
+		virtqueue_xmit_offload(hdr, cookie, true);
+
+	dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
+	dp->len  = cookie->data_len;
+	dp->id   = id;
+
+	if (++vq->vq_avail_idx >= vq->vq_nentries) {
+		vq->vq_avail_idx -= vq->vq_nentries;
+		vq->avail_wrap_counter ^= 1;
+		vq->avail_used_flags ^=
+			VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
+	}
+
+	vq->vq_free_cnt--;
+
+	if (!in_order) {
+		vq->vq_desc_head_idx = dxp->next;
+		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
+	}
+
+	virtio_wmb(vq->hw->weak_barriers);
+	dp->flags = flags;
+}
+
 static inline void
 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 			      uint16_t needed, int can_push, int in_order)
@@ -1979,8 +2035,11 @@  virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
 		}
 
 		/* Enqueue Packet buffers */
-		virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
-					      in_order);
+		if (can_push)
+			virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
+		else
+			virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
+						      in_order);
 
 		virtio_update_packet_stats(&txvq->stats, txm);
 	}