[5/5] net/virtio: optimize xmit enqueue for packed ring
Checks
Commit Message
This patch introduces an optimized enqueue function in packed
ring for the case that virtio net header can be prepended to
the unchained mbuf.
Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
1 file changed, 61 insertions(+), 2 deletions(-)
Comments
On 2/19/19 11:59 AM, Tiwei Bie wrote:
> This patch introduces an optimized enqueue function in packed
> ring for the case that virtio net header can be prepended to
> the unchained mbuf.
>
> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> ---
> drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
> 1 file changed, 61 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> index 60fa3aa50..771d3c3f6 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
> vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
> }
>
> +static inline void
> +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
> + struct rte_mbuf *cookie,
> + int in_order)
> +{
> + struct virtqueue *vq = txvq->vq;
> + struct vring_packed_desc *dp;
> + struct vq_desc_extra *dxp;
> + uint16_t idx, id, flags;
> + uint16_t head_size = vq->hw->vtnet_hdr_size;
> + struct virtio_net_hdr *hdr;
> +
> + id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
> + idx = vq->vq_avail_idx;
> + dp = &vq->ring_packed.desc_packed[idx];
> +
> + dxp = &vq->vq_descx[id];
> + dxp->ndescs = 1;
> + dxp->cookie = cookie;
> +
> + flags = vq->avail_used_flags;
> +
> + /* prepend cannot fail, checked by caller */
> + hdr = (struct virtio_net_hdr *)
> + rte_pktmbuf_prepend(cookie, head_size);
> + cookie->pkt_len -= head_size;
> +
> + /* if offload disabled, hdr is not zeroed yet, do it now */
> + if (!vq->hw->has_tx_offload)
> + virtqueue_clear_net_hdr(hdr);
> + else
> + virtqueue_xmit_offload(hdr, cookie, true);
> +
> + dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
> + dp->len = cookie->data_len;
> + dp->id = id;
> +
> + if (++vq->vq_avail_idx >= vq->vq_nentries) {
> + vq->vq_avail_idx -= vq->vq_nentries;
> + vq->avail_wrap_counter ^= 1;
> + vq->avail_used_flags ^=
> + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> + }
> +
> + vq->vq_free_cnt--;
> +
> + if (!in_order) {
> + vq->vq_desc_head_idx = dxp->next;
> + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> + vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
> + }
> +
> + virtio_wmb(vq->hw->weak_barriers);
> + dp->flags = flags;
> +}
> +
> static inline void
> virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
> uint16_t needed, int can_push, int in_order)
> @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
> }
>
> /* Enqueue Packet buffers */
> - virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
> - in_order);
> + if (can_push)
> + virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
> + else
> + virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
> + in_order);
>
> virtio_update_packet_stats(&txvq->stats, txm);
> }
>
I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
simplified to get rid off "can_push" now that this case as a dedicated
function?
Regards,
Maxime
On Thu, Feb 21, 2019 at 12:22:29PM +0100, Maxime Coquelin wrote:
> On 2/19/19 11:59 AM, Tiwei Bie wrote:
> > This patch introduces an optimized enqueue function in packed
> > ring for the case that virtio net header can be prepended to
> > the unchained mbuf.
> >
> > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > ---
> > drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
> > 1 file changed, 61 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
> > index 60fa3aa50..771d3c3f6 100644
> > --- a/drivers/net/virtio/virtio_rxtx.c
> > +++ b/drivers/net/virtio/virtio_rxtx.c
> > @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
> > vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
> > }
> > +static inline void
> > +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
> > + struct rte_mbuf *cookie,
> > + int in_order)
> > +{
> > + struct virtqueue *vq = txvq->vq;
> > + struct vring_packed_desc *dp;
> > + struct vq_desc_extra *dxp;
> > + uint16_t idx, id, flags;
> > + uint16_t head_size = vq->hw->vtnet_hdr_size;
> > + struct virtio_net_hdr *hdr;
> > +
> > + id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
> > + idx = vq->vq_avail_idx;
> > + dp = &vq->ring_packed.desc_packed[idx];
> > +
> > + dxp = &vq->vq_descx[id];
> > + dxp->ndescs = 1;
> > + dxp->cookie = cookie;
> > +
> > + flags = vq->avail_used_flags;
> > +
> > + /* prepend cannot fail, checked by caller */
> > + hdr = (struct virtio_net_hdr *)
> > + rte_pktmbuf_prepend(cookie, head_size);
> > + cookie->pkt_len -= head_size;
> > +
> > + /* if offload disabled, hdr is not zeroed yet, do it now */
> > + if (!vq->hw->has_tx_offload)
> > + virtqueue_clear_net_hdr(hdr);
> > + else
> > + virtqueue_xmit_offload(hdr, cookie, true);
> > +
> > + dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
> > + dp->len = cookie->data_len;
> > + dp->id = id;
> > +
> > + if (++vq->vq_avail_idx >= vq->vq_nentries) {
> > + vq->vq_avail_idx -= vq->vq_nentries;
> > + vq->avail_wrap_counter ^= 1;
> > + vq->avail_used_flags ^=
> > + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
> > + }
> > +
> > + vq->vq_free_cnt--;
> > +
> > + if (!in_order) {
> > + vq->vq_desc_head_idx = dxp->next;
> > + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> > + vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
> > + }
> > +
> > + virtio_wmb(vq->hw->weak_barriers);
> > + dp->flags = flags;
> > +}
> > +
> > static inline void
> > virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
> > uint16_t needed, int can_push, int in_order)
> > @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
> > }
> > /* Enqueue Packet buffers */
> > - virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
> > - in_order);
> > + if (can_push)
> > + virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
> > + else
> > + virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
> > + in_order);
> > virtio_update_packet_stats(&txvq->stats, txm);
> > }
> >
>
> I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
> simplified to get rid off "can_push" now that this case as a dedicated
> function?
Yeah, I had the same thought. But after a second thought, I
think we may also want to push the net hdr to the mbuf even
if its nb_segs isn't 1 in the future, so I left it untouched.
Thanks,
Tiwei
On 2/21/19 1:25 PM, Tiwei Bie wrote:
> On Thu, Feb 21, 2019 at 12:22:29PM +0100, Maxime Coquelin wrote:
>> On 2/19/19 11:59 AM, Tiwei Bie wrote:
>>> This patch introduces an optimized enqueue function in packed
>>> ring for the case that virtio net header can be prepended to
>>> the unchained mbuf.
>>>
>>> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>>> ---
>>> drivers/net/virtio/virtio_rxtx.c | 63 +++++++++++++++++++++++++++++++-
>>> 1 file changed, 61 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
>>> index 60fa3aa50..771d3c3f6 100644
>>> --- a/drivers/net/virtio/virtio_rxtx.c
>>> +++ b/drivers/net/virtio/virtio_rxtx.c
>>> @@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
>>> vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
>>> }
>>> +static inline void
>>> +virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
>>> + struct rte_mbuf *cookie,
>>> + int in_order)
>>> +{
>>> + struct virtqueue *vq = txvq->vq;
>>> + struct vring_packed_desc *dp;
>>> + struct vq_desc_extra *dxp;
>>> + uint16_t idx, id, flags;
>>> + uint16_t head_size = vq->hw->vtnet_hdr_size;
>>> + struct virtio_net_hdr *hdr;
>>> +
>>> + id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
>>> + idx = vq->vq_avail_idx;
>>> + dp = &vq->ring_packed.desc_packed[idx];
>>> +
>>> + dxp = &vq->vq_descx[id];
>>> + dxp->ndescs = 1;
>>> + dxp->cookie = cookie;
>>> +
>>> + flags = vq->avail_used_flags;
>>> +
>>> + /* prepend cannot fail, checked by caller */
>>> + hdr = (struct virtio_net_hdr *)
>>> + rte_pktmbuf_prepend(cookie, head_size);
>>> + cookie->pkt_len -= head_size;
>>> +
>>> + /* if offload disabled, hdr is not zeroed yet, do it now */
>>> + if (!vq->hw->has_tx_offload)
>>> + virtqueue_clear_net_hdr(hdr);
>>> + else
>>> + virtqueue_xmit_offload(hdr, cookie, true);
>>> +
>>> + dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
>>> + dp->len = cookie->data_len;
>>> + dp->id = id;
>>> +
>>> + if (++vq->vq_avail_idx >= vq->vq_nentries) {
>>> + vq->vq_avail_idx -= vq->vq_nentries;
>>> + vq->avail_wrap_counter ^= 1;
>>> + vq->avail_used_flags ^=
>>> + VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
>>> + }
>>> +
>>> + vq->vq_free_cnt--;
>>> +
>>> + if (!in_order) {
>>> + vq->vq_desc_head_idx = dxp->next;
>>> + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
>>> + vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
>>> + }
>>> +
>>> + virtio_wmb(vq->hw->weak_barriers);
>>> + dp->flags = flags;
>>> +}
>>> +
>>> static inline void
>>> virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
>>> uint16_t needed, int can_push, int in_order)
>>> @@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
>>> }
>>> /* Enqueue Packet buffers */
>>> - virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
>>> - in_order);
>>> + if (can_push)
>>> + virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
>>> + else
>>> + virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
>>> + in_order);
>>> virtio_update_packet_stats(&txvq->stats, txm);
>>> }
>>>
>>
>> I like this patch, but shouldn't virtqueue_enqueue_xmit_packed() be
>> simplified to get rid off "can_push" now that this case as a dedicated
>> function?
>
> Yeah, I had the same thought. But after a second thought, I
> think we may also want to push the net hdr to the mbuf even
> if its nb_segs isn't 1 in the future, so I left it untouched.
Ok, that makes sense.
And I think doing the change won't affect the generated code much
anyway, as "can_push" related things will be stripped out at compilation
time.
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Thanks,
Maxime
> Thanks,
> Tiwei
>
@@ -623,6 +623,62 @@ virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
}
+static inline void
+virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
+ struct rte_mbuf *cookie,
+ int in_order)
+{
+ struct virtqueue *vq = txvq->vq;
+ struct vring_packed_desc *dp;
+ struct vq_desc_extra *dxp;
+ uint16_t idx, id, flags;
+ uint16_t head_size = vq->hw->vtnet_hdr_size;
+ struct virtio_net_hdr *hdr;
+
+ id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
+ idx = vq->vq_avail_idx;
+ dp = &vq->ring_packed.desc_packed[idx];
+
+ dxp = &vq->vq_descx[id];
+ dxp->ndescs = 1;
+ dxp->cookie = cookie;
+
+ flags = vq->avail_used_flags;
+
+ /* prepend cannot fail, checked by caller */
+ hdr = (struct virtio_net_hdr *)
+ rte_pktmbuf_prepend(cookie, head_size);
+ cookie->pkt_len -= head_size;
+
+ /* if offload disabled, hdr is not zeroed yet, do it now */
+ if (!vq->hw->has_tx_offload)
+ virtqueue_clear_net_hdr(hdr);
+ else
+ virtqueue_xmit_offload(hdr, cookie, true);
+
+ dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
+ dp->len = cookie->data_len;
+ dp->id = id;
+
+ if (++vq->vq_avail_idx >= vq->vq_nentries) {
+ vq->vq_avail_idx -= vq->vq_nentries;
+ vq->avail_wrap_counter ^= 1;
+ vq->avail_used_flags ^=
+ VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
+ }
+
+ vq->vq_free_cnt--;
+
+ if (!in_order) {
+ vq->vq_desc_head_idx = dxp->next;
+ if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+ vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
+ }
+
+ virtio_wmb(vq->hw->weak_barriers);
+ dp->flags = flags;
+}
+
static inline void
virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
uint16_t needed, int can_push, int in_order)
@@ -1979,8 +2035,11 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
}
/* Enqueue Packet buffers */
- virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
- in_order);
+ if (can_push)
+ virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
+ else
+ virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
+ in_order);
virtio_update_packet_stats(&txvq->stats, txm);
}