[dpdk-dev,v3,8/8] examples/vhost:support TX offload in vhost sample

Message ID 1446634456-413-9-git-send-email-jijiang.liu@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jijiang Liu Nov. 4, 2015, 10:54 a.m. UTC
  Change the vhost sample to support and test TX offload.

Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 examples/vhost/main.c |  128 ++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 111 insertions(+), 17 deletions(-)
  

Comments

Yuanhan Liu Nov. 9, 2015, 4:17 a.m. UTC | #1
On Wed, Nov 04, 2015 at 06:54:16PM +0800, Jijiang Liu wrote:
> Change the vhost sample to support and test TX offload.
> 
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> ---
>  examples/vhost/main.c |  128 ++++++++++++++++++++++++++++++++++++++++++-------
>  1 files changed, 111 insertions(+), 17 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index 9eac2d0..06e1e8b 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -50,6 +50,10 @@
>  #include <rte_string_fns.h>
>  #include <rte_malloc.h>
>  #include <rte_virtio_net.h>
> +#include <rte_tcp.h>
> +#include <rte_ip.h>
> +#include <rte_udp.h>
> +#include <rte_sctp.h>
>  
>  #include "main.h"
>  
> @@ -140,6 +144,8 @@
>  
>  #define MBUF_EXT_MEM(mb)   (rte_mbuf_from_indirect(mb) != (mb))
>  
> +#define VIRTIO_TX_CKSUM_OFFLOAD_MASK (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)
> +
>  /* mask of enabled ports */
>  static uint32_t enabled_port_mask = 0;
>  
> @@ -197,6 +203,13 @@ typedef enum {
>  static uint32_t enable_stats = 0;
>  /* Enable retries on RX. */
>  static uint32_t enable_retry = 1;
> +
> +/* Disable TX checksum offload */
      ^^^^^^^
You meant to "Enable"?

> +static uint32_t enable_tx_csum;
> +
> +/* Disable TSO offload */
> +static uint32_t enable_tso;

Actually, I'd like to see TSO/CSUM offloading is enabled by default:
they are so common, and they are enabled in a lot places by default,
say, kernel, qemu. There is no reason to make it "disable" here.

> +
>  /* Specify timeout (in useconds) between retries on RX. */
>  static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
>  /* Specify the number of retries on RX. */
> @@ -292,20 +305,6 @@ struct vlan_ethhdr {
>  	__be16          h_vlan_encapsulated_proto;
>  };
>  
> -/* IPv4 Header */
> -struct ipv4_hdr {
> -	uint8_t  version_ihl;		/**< version and header length */
> -	uint8_t  type_of_service;	/**< type of service */
> -	uint16_t total_length;		/**< length of packet */
> -	uint16_t packet_id;		/**< packet ID */
> -	uint16_t fragment_offset;	/**< fragmentation offset */
> -	uint8_t  time_to_live;		/**< time to live */
> -	uint8_t  next_proto_id;		/**< protocol ID */
> -	uint16_t hdr_checksum;		/**< header checksum */
> -	uint32_t src_addr;		/**< source address */
> -	uint32_t dst_addr;		/**< destination address */
> -} __attribute__((__packed__));


Minor nit: it's a cleanup, having nothing to do with this patch (to
demonstrate TSO/CSUM). It belongs to another patch.

> -
>  /* Header lengths. */
>  #define VLAN_HLEN       4
>  #define VLAN_ETH_HLEN   18
> @@ -441,6 +440,14 @@ port_init(uint8_t port)
>  
>  	if (port >= rte_eth_dev_count()) return -1;
>  
> +	if (enable_tx_csum == 0)
> +		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
> +
> +	if (enable_tso == 0) {
> +		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
> +		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
> +	}
> +
>  	rx_rings = (uint16_t)dev_info.max_rx_queues;
>  	/* Configure ethernet device. */
>  	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
> @@ -576,7 +583,9 @@ us_vhost_usage(const char *prgname)
>  	"		--rx-desc-num [0-N]: the number of descriptors on rx, "
>  			"used only when zero copy is enabled.\n"
>  	"		--tx-desc-num [0-N]: the number of descriptors on tx, "
> -			"used only when zero copy is enabled.\n",
> +			"used only when zero copy is enabled.\n"
> +	"		--tx-csum [0|1] disable/enable TX checksum offload.\n"
> +	"		--tso [0|1] disable/enable TCP segement offload.\n",
>  	       prgname);
>  }
>  
> @@ -602,6 +611,8 @@ us_vhost_parse_args(int argc, char **argv)
>  		{"zero-copy", required_argument, NULL, 0},
>  		{"rx-desc-num", required_argument, NULL, 0},
>  		{"tx-desc-num", required_argument, NULL, 0},
> +		{"tx-csum", required_argument, NULL, 0},
> +		{"tso", required_argument, NULL, 0},
>  		{NULL, 0, 0, 0},
>  	};
>  
> @@ -656,6 +667,28 @@ us_vhost_parse_args(int argc, char **argv)
>  				}
>  			}
>  
> +			/* Enable/disable TX checksum offload. */
> +			if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
> +				ret = parse_num_opt(optarg, 1);
> +				if (ret == -1) {
> +					RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
> +					us_vhost_usage(prgname);
> +					return -1;
> +				} else
> +					enable_tx_csum = ret;
> +			}
> +
> +			/* Enable/disable TSO offload. */
> +			if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
> +				ret = parse_num_opt(optarg, 1);
> +				if (ret == -1) {
> +					RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
> +					us_vhost_usage(prgname);
> +					return -1;
> +				} else
> +					enable_tso = ret;
> +			}
> +
>  			/* Specify the retries delay time (in useconds) on RX. */
>  			if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
>  				ret = parse_num_opt(optarg, INT32_MAX);
> @@ -1114,6 +1147,63 @@ find_local_dest(struct virtio_net *dev, struct rte_mbuf *m,
>  	return 0;
>  }
>  
> +static uint16_t
> +get_psd_sum(void *l3_hdr, uint64_t ol_flags)
> +{
> +	if (ol_flags & PKT_TX_IPV4)
> +		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
> +	else /* assume ethertype == ETHER_TYPE_IPv6 */
> +		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
> +}
> +
> +static void virtio_tx_offload(struct rte_mbuf *m)
> +{
> +	void *l3_hdr;
> +	struct ipv4_hdr *ipv4_hdr = NULL;
> +	struct tcp_hdr *tcp_hdr = NULL;
> +	struct udp_hdr *udp_hdr = NULL;
> +	struct sctp_hdr *sctp_hdr = NULL;
> +	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
> +
> +	l3_hdr = (char *)eth_hdr + m->l2_len;
> +
> +	if (m->ol_flags & PKT_TX_IPV4) {
> +		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
> +		if (m->ol_flags & PKT_TX_IP_CKSUM)
> +			ipv4_hdr->hdr_checksum = 0;
> +	}
> +
> +	if (m->ol_flags & PKT_TX_L4_MASK) {
> +		switch (m->ol_flags & PKT_TX_L4_MASK) {
> +		case PKT_TX_TCP_CKSUM:
> +			tcp_hdr = (struct tcp_hdr *)
> +					((char *)l3_hdr + m->l3_len);
> +			tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);

I'm wondering that's necessary here (even for the data going through
nic). AFAIK, the kernel sending the data will calculate pseudo checksum.

(I may be wrong; a simple validation could prove that) 

	--yliu
  
Jijiang Liu Nov. 9, 2015, 8:17 a.m. UTC | #2
> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Monday, November 09, 2015 12:17 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 8/8] examples/vhost:support TX offload in
> vhost sample
> 
> On Wed, Nov 04, 2015 at 06:54:16PM +0800, Jijiang Liu wrote:
> > Change the vhost sample to support and test TX offload.
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > ---
> >  examples/vhost/main.c |  128
> > ++++++++++++++++++++++++++++++++++++++++++-------
> >  1 files changed, 111 insertions(+), 17 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > 9eac2d0..06e1e8b 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -50,6 +50,10 @@
> >  #include <rte_string_fns.h>
> >  #include <rte_malloc.h>
> >  #include <rte_virtio_net.h>
> > +#include <rte_tcp.h>
> > +#include <rte_ip.h>
> > +#include <rte_udp.h>
> > +#include <rte_sctp.h>
> >
> >  #include "main.h"
> >
> > @@ -140,6 +144,8 @@
> >
> Actually, I'd like to see TSO/CSUM offloading is enabled by default:
> they are so common, and they are enabled in a lot places by default, say,
> kernel, qemu. There is no reason to make it "disable" here.
> 
This is configuration  only in the vhost sample,  but TSO/CSUM is enabled by default in lib leyer.

If user want to use it in vhost sample, just change the configuration.
  
Jijiang Liu Nov. 9, 2015, 8:18 a.m. UTC | #3
> >
> > -/* IPv4 Header */
> > -struct ipv4_hdr {
> > -	uint8_t  version_ihl;		/**< version and header length */
> > -	uint8_t  type_of_service;	/**< type of service */
> > -	uint16_t total_length;		/**< length of packet */
> > -	uint16_t packet_id;		/**< packet ID */
> > -	uint16_t fragment_offset;	/**< fragmentation offset */
> > -	uint8_t  time_to_live;		/**< time to live */
> > -	uint8_t  next_proto_id;		/**< protocol ID */
> > -	uint16_t hdr_checksum;		/**< header checksum */
> > -	uint32_t src_addr;		/**< source address */
> > -	uint32_t dst_addr;		/**< destination address */
> > -} __attribute__((__packed__));
> 
> 
> Minor nit: it's a cleanup, having nothing to do with this patch (to
> demonstrate TSO/CSUM). It belongs to another patch.
> 
Ok, it could be a separate patch.
  
Yuanhan Liu Nov. 9, 2015, 8:51 a.m. UTC | #4
On Mon, Nov 09, 2015 at 08:17:24AM +0000, Liu, Jijiang wrote:
> 
> 
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > Sent: Monday, November 09, 2015 12:17 PM
> > To: Liu, Jijiang
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 8/8] examples/vhost:support TX offload in
> > vhost sample
> > 
> > On Wed, Nov 04, 2015 at 06:54:16PM +0800, Jijiang Liu wrote:
> > > Change the vhost sample to support and test TX offload.
> > >
> > > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > > ---
> > >  examples/vhost/main.c |  128
> > > ++++++++++++++++++++++++++++++++++++++++++-------
> > >  1 files changed, 111 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > > 9eac2d0..06e1e8b 100644
> > > --- a/examples/vhost/main.c
> > > +++ b/examples/vhost/main.c
> > > @@ -50,6 +50,10 @@
> > >  #include <rte_string_fns.h>
> > >  #include <rte_malloc.h>
> > >  #include <rte_virtio_net.h>
> > > +#include <rte_tcp.h>
> > > +#include <rte_ip.h>
> > > +#include <rte_udp.h>
> > > +#include <rte_sctp.h>
> > >
> > >  #include "main.h"
> > >
> > > @@ -140,6 +144,8 @@
> > >
> > Actually, I'd like to see TSO/CSUM offloading is enabled by default:
> > they are so common, and they are enabled in a lot places by default, say,
> > kernel, qemu. There is no reason to make it "disable" here.
> > 
> This is configuration  only in the vhost sample,  but TSO/CSUM is enabled by default in lib leyer.
> 
> If user want to use it in vhost sample, just change the configuration.

So, why do you want to disable it by default then?

	--yliu
  
Jijiang Liu Nov. 11, 2015, 6:47 a.m. UTC | #5
> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Monday, November 09, 2015 12:17 PM
> To: Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 8/8] examples/vhost:support TX offload in
> vhost sample
> 
> On Wed, Nov 04, 2015 at 06:54:16PM +0800, Jijiang Liu wrote:
> > Change the vhost sample to support and test TX offload.
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > ---
> >  examples/vhost/main.c |  128
> > ++++++++++++++++++++++++++++++++++++++++++-------
> >  1 files changed, 111 insertions(+), 17 deletions(-)
> >
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > 9eac2d0..06e1e8b 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -50,6 +50,10 @@
> >  #include <rte_string_fns.h>
> >  #include <rte_malloc.h>
> >  #include <rte_virtio_net.h>
> > +#include <rte_tcp.h>
> > +#include <rte_ip.h>
> > +#include <rte_udp.h>
> > +#include <rte_sctp.h>
> >
> >  #include "main.h"
> >
> > @@ -140,6 +144,8 @@
> >
> >  #define MBUF_EXT_MEM(mb)   (rte_mbuf_from_indirect(mb) != (mb))
> >
> > +#define VIRTIO_TX_CKSUM_OFFLOAD_MASK (PKT_TX_IP_CKSUM |
> > +PKT_TX_L4_MASK)
> > +
> >  /* mask of enabled ports */
> >  static uint32_t enabled_port_mask = 0;
> >
> > @@ -197,6 +203,13 @@ typedef enum {
> >  static uint32_t enable_stats = 0;
> >  /* Enable retries on RX. */
> >  static uint32_t enable_retry = 1;
> > +
> > +/* Disable TX checksum offload */
>       ^^^^^^^
> You meant to "Enable"?
> 
> > +static uint32_t enable_tx_csum;
> > +
> > +/* Disable TSO offload */
> > +static uint32_t enable_tso;
> 
> Actually, I'd like to see TSO/CSUM offloading is enabled by default:
> they are so common, and they are enabled in a lot places by default, say,
> kernel, qemu. There is no reason to make it "disable" here.
> 
> > +
> >  /* Specify timeout (in useconds) between retries on RX. */  static
> > uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
> >  /* Specify the number of retries on RX. */ @@ -292,20 +305,6 @@
> > struct vlan_ethhdr {
> >  	__be16          h_vlan_encapsulated_proto;
> >  };
> >
> > -/* IPv4 Header */
> > -struct ipv4_hdr {
> > -	uint8_t  version_ihl;		/**< version and header length */
> > -	uint8_t  type_of_service;	/**< type of service */
> > -	uint16_t total_length;		/**< length of packet */
> > -	uint16_t packet_id;		/**< packet ID */
> > -	uint16_t fragment_offset;	/**< fragmentation offset */
> > -	uint8_t  time_to_live;		/**< time to live */
> > -	uint8_t  next_proto_id;		/**< protocol ID */
> > -	uint16_t hdr_checksum;		/**< header checksum */
> > -	uint32_t src_addr;		/**< source address */
> > -	uint32_t dst_addr;		/**< destination address */
> > -} __attribute__((__packed__));
> 
> 
> Minor nit: it's a cleanup, having nothing to do with this patch (to
> demonstrate TSO/CSUM). It belongs to another patch.
> 
> > -
> >  /* Header lengths. */
> >  #define VLAN_HLEN       4
> >  #define VLAN_ETH_HLEN   18
> > @@ -441,6 +440,14 @@ port_init(uint8_t port)
> >
> >  	if (port >= rte_eth_dev_count()) return -1;
> >
> > +	if (enable_tx_csum == 0)
> > +		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
> > +
> > +	if (enable_tso == 0) {
> > +		rte_vhost_feature_disable(1ULL <<
> VIRTIO_NET_F_HOST_TSO4);
> > +		rte_vhost_feature_disable(1ULL <<
> VIRTIO_NET_F_HOST_TSO6);
> > +	}
> > +
> >  	rx_rings = (uint16_t)dev_info.max_rx_queues;
> >  	/* Configure ethernet device. */
> >  	retval = rte_eth_dev_configure(port, rx_rings, tx_rings,
> > &port_conf); @@ -576,7 +583,9 @@ us_vhost_usage(const char
> *prgname)
> >  	"		--rx-desc-num [0-N]: the number of descriptors on rx,
> "
> >  			"used only when zero copy is enabled.\n"
> >  	"		--tx-desc-num [0-N]: the number of descriptors on tx,
> "
> > -			"used only when zero copy is enabled.\n",
> > +			"used only when zero copy is enabled.\n"
> > +	"		--tx-csum [0|1] disable/enable TX checksum
> offload.\n"
> > +	"		--tso [0|1] disable/enable TCP segement offload.\n",
> >  	       prgname);
> >  }
> >
> > @@ -602,6 +611,8 @@ us_vhost_parse_args(int argc, char **argv)
> >  		{"zero-copy", required_argument, NULL, 0},
> >  		{"rx-desc-num", required_argument, NULL, 0},
> >  		{"tx-desc-num", required_argument, NULL, 0},
> > +		{"tx-csum", required_argument, NULL, 0},
> > +		{"tso", required_argument, NULL, 0},
> >  		{NULL, 0, 0, 0},
> >  	};
> >
> > @@ -656,6 +667,28 @@ us_vhost_parse_args(int argc, char **argv)
> >  				}
> >  			}
> >
> > +			/* Enable/disable TX checksum offload. */
> > +			if (!strncmp(long_option[option_index].name, "tx-
> csum", MAX_LONG_OPT_SZ)) {
> > +				ret = parse_num_opt(optarg, 1);
> > +				if (ret == -1) {
> > +					RTE_LOG(INFO, VHOST_CONFIG,
> "Invalid argument for tx-csum [0|1]\n");
> > +					us_vhost_usage(prgname);
> > +					return -1;
> > +				} else
> > +					enable_tx_csum = ret;
> > +			}
> > +
> > +			/* Enable/disable TSO offload. */
> > +			if (!strncmp(long_option[option_index].name, "tso",
> MAX_LONG_OPT_SZ)) {
> > +				ret = parse_num_opt(optarg, 1);
> > +				if (ret == -1) {
> > +					RTE_LOG(INFO, VHOST_CONFIG,
> "Invalid argument for tso [0|1]\n");
> > +					us_vhost_usage(prgname);
> > +					return -1;
> > +				} else
> > +					enable_tso = ret;
> > +			}
> > +
> >  			/* Specify the retries delay time (in useconds) on RX.
> */
> >  			if (!strncmp(long_option[option_index].name, "rx-
> retry-delay", MAX_LONG_OPT_SZ)) {
> >  				ret = parse_num_opt(optarg, INT32_MAX);
> @@ -1114,6 +1147,63 @@
> > find_local_dest(struct virtio_net *dev, struct rte_mbuf *m,
> >  	return 0;
> >  }
> >
> > +static uint16_t
> > +get_psd_sum(void *l3_hdr, uint64_t ol_flags) {
> > +	if (ol_flags & PKT_TX_IPV4)
> > +		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
> > +	else /* assume ethertype == ETHER_TYPE_IPv6 */
> > +		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); }
> > +
> > +static void virtio_tx_offload(struct rte_mbuf *m) {
> > +	void *l3_hdr;
> > +	struct ipv4_hdr *ipv4_hdr = NULL;
> > +	struct tcp_hdr *tcp_hdr = NULL;
> > +	struct udp_hdr *udp_hdr = NULL;
> > +	struct sctp_hdr *sctp_hdr = NULL;
> > +	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr
> *);
> > +
> > +	l3_hdr = (char *)eth_hdr + m->l2_len;
> > +
> > +	if (m->ol_flags & PKT_TX_IPV4) {
> > +		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
> > +		if (m->ol_flags & PKT_TX_IP_CKSUM)
> > +			ipv4_hdr->hdr_checksum = 0;
> > +	}
> > +
> > +	if (m->ol_flags & PKT_TX_L4_MASK) {
> > +		switch (m->ol_flags & PKT_TX_L4_MASK) {
> > +		case PKT_TX_TCP_CKSUM:
> > +			tcp_hdr = (struct tcp_hdr *)
> > +					((char *)l3_hdr + m->l3_len);
> > +			tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
> 
> I'm wondering that's necessary here (even for the data going through nic).
> AFAIK, the kernel sending the data will calculate pseudo checksum.
> 
> (I may be wrong; a simple validation could prove that)
>
After testing with combining TSO, these fileds need to be set.
> 	--yliu
  

Patch

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9eac2d0..06e1e8b 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -50,6 +50,10 @@ 
 #include <rte_string_fns.h>
 #include <rte_malloc.h>
 #include <rte_virtio_net.h>
+#include <rte_tcp.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
 
 #include "main.h"
 
@@ -140,6 +144,8 @@ 
 
 #define MBUF_EXT_MEM(mb)   (rte_mbuf_from_indirect(mb) != (mb))
 
+#define VIRTIO_TX_CKSUM_OFFLOAD_MASK (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)
+
 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;
 
@@ -197,6 +203,13 @@  typedef enum {
 static uint32_t enable_stats = 0;
 /* Enable retries on RX. */
 static uint32_t enable_retry = 1;
+
+/* Disable TX checksum offload */
+static uint32_t enable_tx_csum;
+
+/* Disable TSO offload */
+static uint32_t enable_tso;
+
 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
 /* Specify the number of retries on RX. */
@@ -292,20 +305,6 @@  struct vlan_ethhdr {
 	__be16          h_vlan_encapsulated_proto;
 };
 
-/* IPv4 Header */
-struct ipv4_hdr {
-	uint8_t  version_ihl;		/**< version and header length */
-	uint8_t  type_of_service;	/**< type of service */
-	uint16_t total_length;		/**< length of packet */
-	uint16_t packet_id;		/**< packet ID */
-	uint16_t fragment_offset;	/**< fragmentation offset */
-	uint8_t  time_to_live;		/**< time to live */
-	uint8_t  next_proto_id;		/**< protocol ID */
-	uint16_t hdr_checksum;		/**< header checksum */
-	uint32_t src_addr;		/**< source address */
-	uint32_t dst_addr;		/**< destination address */
-} __attribute__((__packed__));
-
 /* Header lengths. */
 #define VLAN_HLEN       4
 #define VLAN_ETH_HLEN   18
@@ -441,6 +440,14 @@  port_init(uint8_t port)
 
 	if (port >= rte_eth_dev_count()) return -1;
 
+	if (enable_tx_csum == 0)
+		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
+
+	if (enable_tso == 0) {
+		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
+		rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
+	}
+
 	rx_rings = (uint16_t)dev_info.max_rx_queues;
 	/* Configure ethernet device. */
 	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
@@ -576,7 +583,9 @@  us_vhost_usage(const char *prgname)
 	"		--rx-desc-num [0-N]: the number of descriptors on rx, "
 			"used only when zero copy is enabled.\n"
 	"		--tx-desc-num [0-N]: the number of descriptors on tx, "
-			"used only when zero copy is enabled.\n",
+			"used only when zero copy is enabled.\n"
+	"		--tx-csum [0|1] disable/enable TX checksum offload.\n"
+	"		--tso [0|1] disable/enable TCP segement offload.\n",
 	       prgname);
 }
 
@@ -602,6 +611,8 @@  us_vhost_parse_args(int argc, char **argv)
 		{"zero-copy", required_argument, NULL, 0},
 		{"rx-desc-num", required_argument, NULL, 0},
 		{"tx-desc-num", required_argument, NULL, 0},
+		{"tx-csum", required_argument, NULL, 0},
+		{"tso", required_argument, NULL, 0},
 		{NULL, 0, 0, 0},
 	};
 
@@ -656,6 +667,28 @@  us_vhost_parse_args(int argc, char **argv)
 				}
 			}
 
+			/* Enable/disable TX checksum offload. */
+			if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
+				ret = parse_num_opt(optarg, 1);
+				if (ret == -1) {
+					RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
+					us_vhost_usage(prgname);
+					return -1;
+				} else
+					enable_tx_csum = ret;
+			}
+
+			/* Enable/disable TSO offload. */
+			if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
+				ret = parse_num_opt(optarg, 1);
+				if (ret == -1) {
+					RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
+					us_vhost_usage(prgname);
+					return -1;
+				} else
+					enable_tso = ret;
+			}
+
 			/* Specify the retries delay time (in useconds) on RX. */
 			if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
 				ret = parse_num_opt(optarg, INT32_MAX);
@@ -1114,6 +1147,63 @@  find_local_dest(struct virtio_net *dev, struct rte_mbuf *m,
 	return 0;
 }
 
+static uint16_t
+get_psd_sum(void *l3_hdr, uint64_t ol_flags)
+{
+	if (ol_flags & PKT_TX_IPV4)
+		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
+	else /* assume ethertype == ETHER_TYPE_IPv6 */
+		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
+}
+
+static void virtio_tx_offload(struct rte_mbuf *m)
+{
+	void *l3_hdr;
+	struct ipv4_hdr *ipv4_hdr = NULL;
+	struct tcp_hdr *tcp_hdr = NULL;
+	struct udp_hdr *udp_hdr = NULL;
+	struct sctp_hdr *sctp_hdr = NULL;
+	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	l3_hdr = (char *)eth_hdr + m->l2_len;
+
+	if (m->ol_flags & PKT_TX_IPV4) {
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		if (m->ol_flags & PKT_TX_IP_CKSUM)
+			ipv4_hdr->hdr_checksum = 0;
+	}
+
+	if (m->ol_flags & PKT_TX_L4_MASK) {
+		switch (m->ol_flags & PKT_TX_L4_MASK) {
+		case PKT_TX_TCP_CKSUM:
+			tcp_hdr = (struct tcp_hdr *)
+					((char *)l3_hdr + m->l3_len);
+			tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
+			break;
+		case PKT_TX_UDP_CKSUM:
+			udp_hdr = (struct udp_hdr *)
+					((char *)l3_hdr + m->l3_len);
+			udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, m->ol_flags);
+			break;
+		case PKT_TX_SCTP_CKSUM:
+			sctp_hdr = (struct sctp_hdr *)
+					((char *)l3_hdr + m->l3_len);
+			sctp_hdr->cksum = 0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (m->tso_segsz != 0) {
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
+		m->ol_flags |= PKT_TX_IP_CKSUM;
+		ipv4_hdr->hdr_checksum = 0;
+		tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
+	}
+}
+
 /*
  * This function routes the TX packet to the correct interface. This may be a local device
  * or the physical port.
@@ -1156,7 +1246,7 @@  virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 			(vh->vlan_tci != vlan_tag_be))
 			vh->vlan_tci = vlan_tag_be;
 	} else {
-		m->ol_flags = PKT_TX_VLAN_PKT;
+		m->ol_flags |= PKT_TX_VLAN_PKT;
 
 		/*
 		 * Find the right seg to adjust the data len when offset is
@@ -1180,6 +1270,10 @@  virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
 		m->vlan_tci = vlan_tag;
 	}
 
+	if ((m->ol_flags & VIRTIO_TX_CKSUM_OFFLOAD_MASK) ||
+		(m->ol_flags & PKT_TX_TCP_SEG))
+		virtio_tx_offload(m);
+
 	tx_q->m_table[len] = m;
 	len++;
 	if (enable_stats) {
@@ -1841,7 +1935,7 @@  virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
 		mbuf->buf_physaddr = m->buf_physaddr;
 		mbuf->buf_addr = m->buf_addr;
 	}
-	mbuf->ol_flags = PKT_TX_VLAN_PKT;
+	mbuf->ol_flags |= PKT_TX_VLAN_PKT;
 	mbuf->vlan_tci = vlan_tag;
 	mbuf->l2_len = sizeof(struct ether_hdr);
 	mbuf->l3_len = sizeof(struct ipv4_hdr);