[dpdk-dev,v5,1/4] vhost/lib: add vhost TX offload capabilities in vhost lib

Message ID 1447330026-16685-2-git-send-email-jijiang.liu@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jijiang Liu Nov. 12, 2015, 12:07 p.m. UTC
  Add vhost TX offload(CSUM and TSO) support capabilities in vhost lib.

Refer to feature bits in Virtual I/O Device (VIRTIO) Version 1.0 below,

VIRTIO_NET_F_CSUM (0) Device handles packets with partial checksum. This "checksum offload" is a common feature on modern network cards.
VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4.
VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6.

In order to support these features, and the following changes are added,

1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.

2. Dequeue TX offload: convert the fileds in virtio_net_hdr to the related fileds in mbuf.


Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_vhost/vhost_rxtx.c |  103 +++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/virtio-net.c |    6 ++-
 2 files changed, 108 insertions(+), 1 deletions(-)
  

Comments

Yuanhan Liu Nov. 13, 2015, 7:01 a.m. UTC | #1
On Thu, Nov 12, 2015 at 08:07:03PM +0800, Jijiang Liu wrote:
> Add vhost TX offload(CSUM and TSO) support capabilities in vhost lib.
> 
> Refer to feature bits in Virtual I/O Device (VIRTIO) Version 1.0 below,
> 
> VIRTIO_NET_F_CSUM (0) Device handles packets with partial checksum. This "checksum offload" is a common feature on modern network cards.
> VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4.
> VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6.
> 
> In order to support these features, and the following changes are added,
> 
> 1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
> 
> 2. Dequeue TX offload: convert the fileds in virtio_net_hdr to the related fileds in mbuf.
> 
> 
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
...
> +static void
> +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
> +{
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct ipv6_hdr *ipv6_hdr;
> +	void *l3_hdr = NULL;
> +	struct ether_hdr *eth_hdr;
> +	uint16_t ethertype;
> +
> +	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
> +
> +	m->l2_len = sizeof(struct ether_hdr);
> +	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
> +
> +	if (ethertype == ETHER_TYPE_VLAN) {
> +		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
> +
> +		m->l2_len += sizeof(struct vlan_hdr);
> +		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
> +	}
> +
> +	l3_hdr = (char *)eth_hdr + m->l2_len;
> +
> +	switch (ethertype) {
> +	case ETHER_TYPE_IPv4:
> +		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
> +		*l4_proto = ipv4_hdr->next_proto_id;
> +		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
> +		*l4_hdr = (char *)l3_hdr + m->l3_len;
> +		m->ol_flags |= PKT_TX_IPV4;
> +		break;
> +	case ETHER_TYPE_IPv6:
> +		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
> +		*l4_proto = ipv6_hdr->proto;
> +		m->l3_len = sizeof(struct ipv6_hdr);
> +		*l4_hdr = (char *)l3_hdr + m->l3_len;
> +		m->ol_flags |= PKT_TX_IPV6;
> +		break;

Note that I'm still not that satisfied with putting all those kind
of calculation into vhost library.

Every application requesting TSO and CSUM offload features need
setup them, so I'm wondering _if_ we can put them into a libraray,
say lib_ether, and let the application just set few key fields
and left others to that lib.

That could leaves us from touching those chaos, such as TCP and
IP, here and there. And, that, IMO, would be a more elegant way
to leverage hardware TSO and CSUM offload features.

And I guess that might need some efforts and more discussions,
so I'm okay to left that in later versions. (Hence, I gave
my ack).

(I know little about lib_ether and DPDK hardware TSO settings,
so I could be wrong, and sorry for that if so)

	--yliu

> +	default:
> +		m->l3_len = 0;
> +		*l4_proto = 0;
> +		break;
> +	}
> +}
> +
> +static inline void __attribute__((always_inline))
> +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
> +{
> +	uint16_t l4_proto = 0;
> +	void *l4_hdr = NULL;
> +	struct tcp_hdr *tcp_hdr = NULL;
> +
> +	parse_ethernet(m, &l4_proto, &l4_hdr);
> +	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> +		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
> +			switch (hdr->csum_offset) {
> +			case (offsetof(struct tcp_hdr, cksum)):
> +				if (l4_proto == IPPROTO_TCP)
> +					m->ol_flags |= PKT_TX_TCP_CKSUM;
> +				break;
> +			case (offsetof(struct udp_hdr, dgram_cksum)):
> +				if (l4_proto == IPPROTO_UDP)
> +					m->ol_flags |= PKT_TX_UDP_CKSUM;
> +				break;
> +			case (offsetof(struct sctp_hdr, cksum)):
> +				if (l4_proto == IPPROTO_SCTP)
> +					m->ol_flags |= PKT_TX_SCTP_CKSUM;
> +				break;
> +			default:
> +				break;
> +			}
> +		}
> +	}
> +
> +	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> +		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> +		case VIRTIO_NET_HDR_GSO_TCPV4:
> +		case VIRTIO_NET_HDR_GSO_TCPV6:
> +			tcp_hdr = (struct tcp_hdr *)l4_hdr;
> +			m->ol_flags |= PKT_TX_TCP_SEG;
> +			m->tso_segsz = hdr->gso_size;
> +			m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
> +			break;
> +		default:
> +			RTE_LOG(WARNING, VHOST_DATA,
> +				"unsupported gso type %u.\n", hdr->gso_type);
> +			break;
> +		}
> +	}
> +}
  
Jijiang Liu Nov. 16, 2015, 7:56 a.m. UTC | #2
Hi Yunhan,

> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Friday, November 13, 2015 3:02 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5 1/4] vhost/lib: add vhost TX offload
> capabilities in vhost lib
> 
> On Thu, Nov 12, 2015 at 08:07:03PM +0800, Jijiang Liu wrote:
> > Add vhost TX offload(CSUM and TSO) support capabilities in vhost lib.
> >
> > Refer to feature bits in Virtual I/O Device (VIRTIO) Version 1.0
> > below,
> >
> > VIRTIO_NET_F_CSUM (0) Device handles packets with partial checksum.
> This "checksum offload" is a common feature on modern network cards.
> > VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4.
> > VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6.
> >
> > In order to support these features, and the following changes are
> > added,
> >
> > 1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload
> features negotiation.
> >
> > 2. Dequeue TX offload: convert the fileds in virtio_net_hdr to the related
> fileds in mbuf.
> >
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> ...
> > +static void
> > +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
> > +{
> > +	struct ipv4_hdr *ipv4_hdr;
> > +	struct ipv6_hdr *ipv6_hdr;
> > +	void *l3_hdr = NULL;
> > +	struct ether_hdr *eth_hdr;
> > +	uint16_t ethertype;
> > +
> > +	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
> > +
> > +	m->l2_len = sizeof(struct ether_hdr);
> > +	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
> > +
> > +	if (ethertype == ETHER_TYPE_VLAN) {
> > +		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
> > +
> > +		m->l2_len += sizeof(struct vlan_hdr);
> > +		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
> > +	}
> > +
> > +	l3_hdr = (char *)eth_hdr + m->l2_len;
> > +
> > +	switch (ethertype) {
> > +	case ETHER_TYPE_IPv4:
> > +		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
> > +		*l4_proto = ipv4_hdr->next_proto_id;
> > +		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
> > +		*l4_hdr = (char *)l3_hdr + m->l3_len;
> > +		m->ol_flags |= PKT_TX_IPV4;
> > +		break;
> > +	case ETHER_TYPE_IPv6:
> > +		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
> > +		*l4_proto = ipv6_hdr->proto;
> > +		m->l3_len = sizeof(struct ipv6_hdr);
> > +		*l4_hdr = (char *)l3_hdr + m->l3_len;
> > +		m->ol_flags |= PKT_TX_IPV6;
> > +		break;
> 
> Note that I'm still not that satisfied with putting all those kind of calculation
> into vhost library.
> 
> Every application requesting TSO and CSUM offload features need setup
> them, so I'm wondering _if_ we can put them into a libraray, say lib_ether,
> and let the application just set few key fields and left others to that lib.
> 
> That could leaves us from touching those chaos, such as TCP and IP, here and
> there. And, that, IMO, would be a more elegant way to leverage hardware
> TSO and CSUM offload features.
> 
> And I guess that might need some efforts and more discussions, so I'm okay
> to left that in later versions. (Hence, I gave my ack).
> 
> (I know little about lib_ether and DPDK hardware TSO settings, so I could be
> wrong, and sorry for that if so)

You suggestion is good, I also think we should add some L2/L3 protocols parse into DPDK libs.
as you said, there need more discussions for this, maybe we can do this in the future.

But now, it is necessary to add parse_ethernet() function here to get the essential information.

> 
> 	--yliu
> 
> > +	default:
> > +		m->l3_len = 0;
> > +		*l4_proto = 0;
> > +		break;
> > +	}
> > +}
> > +
> > +static inline void __attribute__((always_inline))
> > +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
> > +{
> > +	uint16_t l4_proto = 0;
> > +	void *l4_hdr = NULL;
> > +	struct tcp_hdr *tcp_hdr = NULL;
> > +
> > +	parse_ethernet(m, &l4_proto, &l4_hdr);
> > +	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> > +		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
> > +			switch (hdr->csum_offset) {
> > +			case (offsetof(struct tcp_hdr, cksum)):
> > +				if (l4_proto == IPPROTO_TCP)
> > +					m->ol_flags |= PKT_TX_TCP_CKSUM;
> > +				break;
> > +			case (offsetof(struct udp_hdr, dgram_cksum)):
> > +				if (l4_proto == IPPROTO_UDP)
> > +					m->ol_flags |= PKT_TX_UDP_CKSUM;
> > +				break;
> > +			case (offsetof(struct sctp_hdr, cksum)):
> > +				if (l4_proto == IPPROTO_SCTP)
> > +					m->ol_flags |=
> PKT_TX_SCTP_CKSUM;
> > +				break;
> > +			default:
> > +				break;
> > +			}
> > +		}
> > +	}
> > +
> > +	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> > +		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> > +		case VIRTIO_NET_HDR_GSO_TCPV4:
> > +		case VIRTIO_NET_HDR_GSO_TCPV6:
> > +			tcp_hdr = (struct tcp_hdr *)l4_hdr;
> > +			m->ol_flags |= PKT_TX_TCP_SEG;
> > +			m->tso_segsz = hdr->gso_size;
> > +			m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
> > +			break;
> > +		default:
> > +			RTE_LOG(WARNING, VHOST_DATA,
> > +				"unsupported gso type %u.\n", hdr-
> >gso_type);
> > +			break;
> > +		}
> > +	}
> > +}
  

Patch

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 9322ce6..47d5f85 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -37,7 +37,12 @@ 
 
 #include <rte_mbuf.h>
 #include <rte_memcpy.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
 #include <rte_virtio_net.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
 
 #include "vhost-net.h"
 
@@ -568,6 +573,97 @@  rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
 		return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
+static void
+parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	void *l3_hdr = NULL;
+	struct ether_hdr *eth_hdr;
+	uint16_t ethertype;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	m->l2_len = sizeof(struct ether_hdr);
+	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+	if (ethertype == ETHER_TYPE_VLAN) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		m->l2_len += sizeof(struct vlan_hdr);
+		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+	}
+
+	l3_hdr = (char *)eth_hdr + m->l2_len;
+
+	switch (ethertype) {
+	case ETHER_TYPE_IPv4:
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		*l4_proto = ipv4_hdr->next_proto_id;
+		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV4;
+		break;
+	case ETHER_TYPE_IPv6:
+		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+		*l4_proto = ipv6_hdr->proto;
+		m->l3_len = sizeof(struct ipv6_hdr);
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV6;
+		break;
+	default:
+		m->l3_len = 0;
+		*l4_proto = 0;
+		break;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
+{
+	uint16_t l4_proto = 0;
+	void *l4_hdr = NULL;
+	struct tcp_hdr *tcp_hdr = NULL;
+
+	parse_ethernet(m, &l4_proto, &l4_hdr);
+	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
+			switch (hdr->csum_offset) {
+			case (offsetof(struct tcp_hdr, cksum)):
+				if (l4_proto == IPPROTO_TCP)
+					m->ol_flags |= PKT_TX_TCP_CKSUM;
+				break;
+			case (offsetof(struct udp_hdr, dgram_cksum)):
+				if (l4_proto == IPPROTO_UDP)
+					m->ol_flags |= PKT_TX_UDP_CKSUM;
+				break;
+			case (offsetof(struct sctp_hdr, cksum)):
+				if (l4_proto == IPPROTO_SCTP)
+					m->ol_flags |= PKT_TX_SCTP_CKSUM;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			tcp_hdr = (struct tcp_hdr *)l4_hdr;
+			m->ol_flags |= PKT_TX_TCP_SEG;
+			m->tso_segsz = hdr->gso_size;
+			m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+			break;
+		default:
+			RTE_LOG(WARNING, VHOST_DATA,
+				"unsupported gso type %u.\n", hdr->gso_type);
+			break;
+		}
+	}
+}
+
 uint16_t
 rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -576,11 +672,13 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	struct vhost_virtqueue *vq;
 	struct vring_desc *desc;
 	uint64_t vb_addr = 0;
+	uint64_t vb_net_hdr_addr = 0;
 	uint32_t head[MAX_PKT_BURST];
 	uint32_t used_idx;
 	uint32_t i;
 	uint16_t free_entries, entry_success = 0;
 	uint16_t avail_idx;
+	struct virtio_net_hdr *hdr = NULL;
 
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
 		RTE_LOG(ERR, VHOST_DATA,
@@ -632,6 +730,9 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 
 		desc = &vq->desc[head[entry_success]];
 
+		vb_net_hdr_addr = gpa_to_vva(dev, desc->addr);
+		hdr = (struct virtio_net_hdr *)((uintptr_t)vb_net_hdr_addr);
+
 		/* Discard first buffer as it is the virtio header */
 		if (desc->flags & VRING_DESC_F_NEXT) {
 			desc = &vq->desc[desc->next];
@@ -770,6 +871,8 @@  rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 			break;
 
 		m->nb_segs = seg_num;
+		if ((hdr->flags != 0) || (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE))
+			vhost_dequeue_offload(hdr, m);
 
 		pkts[entry_success] = m;
 		vq->last_used_idx++;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 14278de..81bd309 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -77,7 +77,11 @@  static struct virtio_net_config_ll *ll_root;
 				(VHOST_SUPPORTS_MQ)            | \
 				(1ULL << VIRTIO_F_VERSION_1)   | \
 				(1ULL << VHOST_F_LOG_ALL)      | \
-				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
+				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_CSUM))
+
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;