vhost: merge repeated loop in vhost Tx
Checks
Commit Message
To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
and virtio_dev_rx_split to reduce a loop iteration.
Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
Cc: stable@dpdk.org
Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
---
drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
lib/vhost/virtio_net.c | 21 +++++++++++++++++++++
2 files changed, 25 insertions(+), 21 deletions(-)
Comments
On 9/10/21 11:05, Gaoxiang Liu wrote:
> To improve performance of vhost Tx, merge repeated loop in eth_vhost_tx.
> Move "vlan insert" from eth_vhost_tx to virtio_dev_rx_packed
> and virtio_dev_rx_split to reduce a loop iteration.
>
> Fixes: f63d356ee993 ("net/vhost: insert/strip VLAN header in software")
> Cc: stable@dpdk.org
This kind of performance optimization should not be backported to stable
branches.
>
> Signed-off-by: Gaoxiang Liu <gaoxiangliu0@163.com>
> ---
> drivers/net/vhost/rte_eth_vhost.c | 25 ++++---------------------
> lib/vhost/virtio_net.c | 21 +++++++++++++++++++++
> 2 files changed, 25 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index a202931e9a..ae20550976 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -428,7 +428,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> {
> struct vhost_queue *r = q;
> uint16_t i, nb_tx = 0;
> - uint16_t nb_send = 0;
> uint64_t nb_bytes = 0;
> uint64_t nb_missed = 0;
>
> @@ -440,33 +439,17 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
> goto out;
>
> - for (i = 0; i < nb_bufs; i++) {
> - struct rte_mbuf *m = bufs[i];
> -
> - /* Do VLAN tag insertion */
> - if (m->ol_flags & PKT_TX_VLAN_PKT) {
> - int error = rte_vlan_insert(&m);
> - if (unlikely(error)) {
> - rte_pktmbuf_free(m);
> - continue;
> - }
> - }
> -
> - bufs[nb_send] = m;
> - ++nb_send;
> - }
> -
> /* Enqueue packets to guest RX queue */
> - while (nb_send) {
> + while (nb_bufs) {
> uint16_t nb_pkts;
> - uint16_t num = (uint16_t)RTE_MIN(nb_send,
> + uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
> VHOST_MAX_PKT_BURST);
>
> nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
> &bufs[nb_tx], num);
>
> nb_tx += nb_pkts;
> - nb_send -= nb_pkts;
> + nb_bufs -= nb_pkts;
> if (nb_pkts < num)
> break;
> }
> @@ -474,7 +457,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> for (i = 0; likely(i < nb_tx); i++)
> nb_bytes += bufs[i]->pkt_len;
>
> - nb_missed = nb_bufs - nb_tx;
> + nb_missed = nb_bufs;
>
> r->stats.pkts += nb_tx;
> r->stats.bytes += nb_bytes;
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 8549afbbe1..2057f4e7fe 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -1218,6 +1218,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
> uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
> uint16_t nr_vec = 0;
>
> + /* Do VLAN tag insertion */
> + if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
> + int error = rte_vlan_insert(&pkts[pkt_idx]);
> + if (unlikely(error)) {
> + rte_pktmbuf_free(pkts[pkt_idx]);
> + pkts[pkt_idx] = NULL;
> + continue;
> + }
> + }
> +
> if (unlikely(reserve_avail_buf_split(dev, vq,
> pkt_len, buf_vec, &num_buffers,
> avail_head, &nr_vec) < 0)) {
> @@ -1490,6 +1500,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
> do {
> rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
>
> + /* Do VLAN tag insertion */
> + if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
> + int error = rte_vlan_insert(&pkts[pkt_idx]);
> + if (unlikely(error)) {
> + rte_pktmbuf_free(pkts[pkt_idx]);
> + pkts[pkt_idx] = NULL;
> + pkt_idx++;
> + continue;
> + }
> + }
> +
> if (count - pkt_idx >= PACKED_BATCH_SIZE) {
> if (!virtio_dev_rx_sync_batch_packed(dev, vq,
> &pkts[pkt_idx])) {
>
It would make sense to do that in virtio_enqueue_offload, and it would
avoid code duplication.
Regards,
Maxime
@@ -428,7 +428,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
{
struct vhost_queue *r = q;
uint16_t i, nb_tx = 0;
- uint16_t nb_send = 0;
uint64_t nb_bytes = 0;
uint64_t nb_missed = 0;
@@ -440,33 +439,17 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
goto out;
- for (i = 0; i < nb_bufs; i++) {
- struct rte_mbuf *m = bufs[i];
-
- /* Do VLAN tag insertion */
- if (m->ol_flags & PKT_TX_VLAN_PKT) {
- int error = rte_vlan_insert(&m);
- if (unlikely(error)) {
- rte_pktmbuf_free(m);
- continue;
- }
- }
-
- bufs[nb_send] = m;
- ++nb_send;
- }
-
/* Enqueue packets to guest RX queue */
- while (nb_send) {
+ while (nb_bufs) {
uint16_t nb_pkts;
- uint16_t num = (uint16_t)RTE_MIN(nb_send,
+ uint16_t num = (uint16_t)RTE_MIN(nb_bufs,
VHOST_MAX_PKT_BURST);
nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
&bufs[nb_tx], num);
nb_tx += nb_pkts;
- nb_send -= nb_pkts;
+ nb_bufs -= nb_pkts;
if (nb_pkts < num)
break;
}
@@ -474,7 +457,7 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
for (i = 0; likely(i < nb_tx); i++)
nb_bytes += bufs[i]->pkt_len;
- nb_missed = nb_bufs - nb_tx;
+ nb_missed = nb_bufs;
r->stats.pkts += nb_tx;
r->stats.bytes += nb_bytes;
@@ -1218,6 +1218,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
uint16_t nr_vec = 0;
+ /* Do VLAN tag insertion */
+ if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+ int error = rte_vlan_insert(&pkts[pkt_idx]);
+ if (unlikely(error)) {
+ rte_pktmbuf_free(pkts[pkt_idx]);
+ pkts[pkt_idx] = NULL;
+ continue;
+ }
+ }
+
if (unlikely(reserve_avail_buf_split(dev, vq,
pkt_len, buf_vec, &num_buffers,
avail_head, &nr_vec) < 0)) {
@@ -1490,6 +1500,17 @@ virtio_dev_rx_packed(struct virtio_net *dev,
do {
rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+ /* Do VLAN tag insertion */
+ if (pkts[pkt_idx]->ol_flags & PKT_TX_VLAN_PKT) {
+ int error = rte_vlan_insert(&pkts[pkt_idx]);
+ if (unlikely(error)) {
+ rte_pktmbuf_free(pkts[pkt_idx]);
+ pkts[pkt_idx] = NULL;
+ pkt_idx++;
+ continue;
+ }
+ }
+
if (count - pkt_idx >= PACKED_BATCH_SIZE) {
if (!virtio_dev_rx_sync_batch_packed(dev, vq,
&pkts[pkt_idx])) {