[dpdk-dev,v3,3/3] app/testpmd:change tx_checksum command and csum forwarding engine

Message ID 1418173403-30202-4-git-send-email-jijiang.liu@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jijiang Liu Dec. 10, 2014, 1:03 a.m. UTC
The patch enhances the tx_checksum command and reworks csum forwarding engine due to the change of tx_checksum command.
The main changes of the tx_checksum command are listed below,
 
1. add "tx_checksum set tunnel (hw|sw|none) (port-id)" command
 
2. add "tx_checksum set outer-ip (hw|sw) (port-id)" command
 
3. remove the "vxlan" option from the "tx_checksum set(ip|udp|tcp|sctp|vxlan) (hw|sw) (port-id)" command
 
Moreover, replace the TESTPMD_TX_OFFLOAD_VXLAN_CKSUM flag with TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM flag, and add the TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM and TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM flag.


Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 app/test-pmd/cmdline.c  |  209 ++++++++++++++++++++++++++++++++++++++++++++---
 app/test-pmd/csumonly.c |   38 ++++++---
 app/test-pmd/testpmd.h  |   14 +++-
 3 files changed, 234 insertions(+), 27 deletions(-)
  

Comments

Olivier Matz Dec. 11, 2014, 10:52 a.m. UTC | #1
Hi Jijiang,

Some more comments, in addition to the one I've made in the cover
letter. Reference link for patchwork readers:
http://dpdk.org/ml/archives/dev/2014-December/009886.html

On 12/10/2014 02:03 AM, Jijiang Liu wrote:
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -316,16 +316,30 @@ static void cmd_help_long_parsed(void *parsed_result,
>   			"    Disable hardware insertion of a VLAN header in"
>   			" packets sent on a port.\n\n"
>
> -			"tx_cksum set (ip|udp|tcp|sctp|vxlan) (hw|sw) (port_id)\n"
> +			"tx_cksum set (ip|udp|tcp|sctp) (hw|sw) (port_id)\n"
>   			"    Select hardware or software calculation of the"
>   			" checksum with when transmitting a packet using the"
>   			" csum forward engine.\n"
> -			"    ip|udp|tcp|sctp always concern the inner layer.\n"
> -			"    vxlan concerns the outer IP and UDP layer (in"
> -			" case the packet is recognized as a vxlan packet by"
> -			" the forward engine)\n"
> +			"    In the case of tunneling packet, ip|udp|tcp|sctp"
> +			" always concern the inner layer.\n\n"
> +
> +			"tx_cksum set tunnel (hw|sw|none) (port_id)\n"
> +			" Select hardware or software calculation of the"
> +			" checksum with when transmitting a tunneling packet"
> +			" using the csum forward engine.\n"
> +			" The none option means treat tunneling packet as ordinary"
> +			" packet when using the csum forward engine\n."
> +			"    Tunneling packet concerns the outer IP, inner IP"
> +			" and inner L4\n"
>   			"    Please check the NIC datasheet for HW limits.\n\n"
>
> +			"tx_cksum set (outer-ip) (hw|sw) (port_id)\n"
> +			"    Select hardware or software calculation of the"
> +			" checksum with when transmitting a packet using the"
> +			" csum forward engine.\n"
> +			"    outer-ip always concern the outer layer of"
> +			" tunneling packet.\n\n"
> +
>   			"tx_checksum show (port_id)\n"
>   			"    Display tx checksum offload configuration\n\n"
>

not sure we need 2 different commands for tx_cksum set (outer-ip) and
tx_cksum set (ip|udp|tcp|sctp). As the syntax is exactly the same, it
may result in less code to have only one command.


> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -256,17 +256,16 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t outer_ethertype,
>   	struct udp_hdr *udp_hdr;
>   	uint64_t ol_flags = 0;
>
> -	if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> -		ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
> -
>   	if (outer_ethertype == _htons(ETHER_TYPE_IPv4)) {
>   		ipv4_hdr->hdr_checksum = 0;
>
> -		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> +		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)
>   			ol_flags |= PKT_TX_OUTER_IP_CKSUM;
> -		else
> +		else {
>   			ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> -	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> +			ol_flags |= PKT_TX_OUTER_IPV4;
> +		}
> +	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
>   		ol_flags |= PKT_TX_OUTER_IPV6;
>
>   	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + outer_l3_len);
> @@ -300,11 +299,14 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t outer_ethertype,
>    *   Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 /
>    *           UDP|TCP|SCTP
>    *
> - * The testpmd command line for this forward engine sets the flags
> + * These testpmd command lines for this forward engine sets the flags
>    * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control
> - * wether a checksum must be calculated in software or in hardware. The
> - * IP, UDP, TCP and SCTP flags always concern the inner layer.  The
> - * VxLAN flag concerns the outer IP (if packet is recognized as a vxlan packet).
> + * wether a checksum must be calculated in software or in hardware.
> + * In the case of tunneling packet, the IP, UDP, TCP and SCTP flags
> + * always concern the inner layer; the outer IP flag always concern
> + * the outer layer; the tunnel flag is used to tell the NIC that it
> + * is a tunneing packet, want hardware offload for outer layer,
> + * or inner layer, or both.

tunneing -> tunneling

"the tunnel flag is used to tell the NIC that it is a tunneing packet,
want hardware offload for outer layer, or inner layer, or both."

what does that mean?


>    */
>   static void
>   pkt_burst_checksum_forward(struct fwd_stream *fs)
> @@ -376,7 +378,9 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>   		l3_hdr = (char *)eth_hdr + l2_len;
>
>   		/* check if it's a supported tunnel (only vxlan for now) */
> -		if (l4_proto == IPPROTO_UDP) {
> +		if (((testpmd_ol_flags &
> +			TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM) == 0)
> +			&& (l4_proto == IPPROTO_UDP)) {
>   			udp_hdr = (struct udp_hdr *)((char *)l3_hdr + l3_len);
>
>   			/* check udp destination port, 4789 is the default
> @@ -386,17 +390,23 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>   				tunnel = 1;
>
>   			/* currently, this flag is set by i40e only if the
> -			 * packet is vxlan */
> +			 * packet is a tunneling packet */
>   			} else if (m->ol_flags & (PKT_RX_TUNNEL_IPV4_HDR |
>   					PKT_RX_TUNNEL_IPV6_HDR))
>   				tunnel = 1;
>
>   			if (tunnel == 1) {
> +
> +				if (testpmd_ol_flags
> +					& TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
> +					ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
> +
>   				outer_ethertype = ethertype;
>   				outer_l2_len = l2_len;
>   				outer_l3_len = l3_len;
>   				outer_l3_hdr = l3_hdr;
>
> +				/* currently, only VXLAN packet is supported */
>   				eth_hdr = (struct ether_hdr *)((char *)udp_hdr +
>   					sizeof(struct udp_hdr) +
>   					sizeof(struct vxlan_hdr));
> @@ -434,7 +444,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>   		/* step 4: fill the mbuf meta data (flags and header lengths) */
>
>   		if (tunnel == 1) {
> -			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) {
> +			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM) {
>   				m->outer_l2_len = outer_l2_len;
>   				m->outer_l3_len = outer_l3_len;
>   				m->l2_len = l4_tun_len + l2_len;
> @@ -505,7 +515,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>   					"m->l4_len=%d\n",
>   					m->l2_len, m->l3_len, m->l4_len);
>   			if ((tunnel == 1) &&
> -				(testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM))
> +				(testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM))
>   				printf("tx: m->outer_l2_len=%d m->outer_l3_len=%d\n",
>   					m->outer_l2_len, m->outer_l3_len);
>   			if (tso_segsz != 0)
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index f8b0740..09caa6a 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -125,10 +125,20 @@ struct fwd_stream {
>   #define TESTPMD_TX_OFFLOAD_TCP_CKSUM         0x0004
>   /** Offload SCTP checksum in csum forward engine */
>   #define TESTPMD_TX_OFFLOAD_SCTP_CKSUM        0x0008
> -/** Offload VxLAN checksum in csum forward engine */
> -#define TESTPMD_TX_OFFLOAD_VXLAN_CKSUM       0x0010
> +/** Offload tunneling packet checksum in csum forward engine */
> +#define TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM      0x0010
>   /** Insert VLAN header in forward engine */
>   #define TESTPMD_TX_OFFLOAD_INSERT_VLAN       0x0020
> +/**
> + * Offload outer-IP checksum in csum forward engine
> + * for tunneling packet
> + */
> +#define TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM    0x0040
> +/**
> + * For a tunneling packet, user requests HW offload for its outer
> + * layer checksum, and don't care is it a tunneled packet or not.
> + */
> +#define TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM  0x0080
>
>   /**
>    * The data structure associated with each port.
>

For now, I did not check the implementation of the patch. I'm not
sure I understand the specifications, so I cannot check if the code
conforms to it.

How this patch is tested? I think a report similar to
http://dpdk.org/ml/archives/dev/2014-November/007991.html would help
to verify that it works (at least on one driver), and to understand
the test-pmd API and the different use cases.

Regards,
Olivier
  
Jijiang Liu Dec. 12, 2014, 4:06 a.m. UTC | #2
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Thursday, December 11, 2014 6:53 PM
> To: Liu, Jijiang; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 3/3] app/testpmd:change tx_checksum
> command and csum forwarding engine
> 
> Hi Jijiang,
> 
> Some more comments, in addition to the one I've made in the cover letter.
> Reference link for patchwork readers:
> http://dpdk.org/ml/archives/dev/2014-December/009886.html
> 
> On 12/10/2014 02:03 AM, Jijiang Liu wrote:
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -316,16 +316,30 @@ static void cmd_help_long_parsed(void
> *parsed_result,
> >   			"    Disable hardware insertion of a VLAN header in"
> >   			" packets sent on a port.\n\n"
> >
> > -			"tx_cksum set (ip|udp|tcp|sctp|vxlan) (hw|sw)
> (port_id)\n"
> > +			"tx_cksum set (ip|udp|tcp|sctp) (hw|sw) (port_id)\n"
> >   			"    Select hardware or software calculation of the"
> >   			" checksum with when transmitting a packet using the"
> >   			" csum forward engine.\n"
> > -			"    ip|udp|tcp|sctp always concern the inner layer.\n"
> > -			"    vxlan concerns the outer IP and UDP layer (in"
> > -			" case the packet is recognized as a vxlan packet by"
> > -			" the forward engine)\n"
> > +			"    In the case of tunneling packet, ip|udp|tcp|sctp"
> > +			" always concern the inner layer.\n\n"
> > +
> > +			"tx_cksum set tunnel (hw|sw|none) (port_id)\n"
> > +			" Select hardware or software calculation of the"
> > +			" checksum with when transmitting a tunneling packet"
> > +			" using the csum forward engine.\n"
> > +			" The none option means treat tunneling packet as
> ordinary"
> > +			" packet when using the csum forward engine\n."
> > +			"    Tunneling packet concerns the outer IP, inner IP"
> > +			" and inner L4\n"
> >   			"    Please check the NIC datasheet for HW limits.\n\n"
> >
> > +			"tx_cksum set (outer-ip) (hw|sw) (port_id)\n"
> > +			"    Select hardware or software calculation of the"
> > +			" checksum with when transmitting a packet using the"
> > +			" csum forward engine.\n"
> > +			"    outer-ip always concern the outer layer of"
> > +			" tunneling packet.\n\n"
> > +
> >   			"tx_checksum show (port_id)\n"
> >   			"    Display tx checksum offload configuration\n\n"
> >
> 
> not sure we need 2 different commands for tx_cksum set (outer-ip) and tx_cksum
> set (ip|udp|tcp|sctp). As the syntax is exactly the same, it may result in less code
> to have only one command.

Why do we have a separate command for outer layer, I have explained this in other mail.
Do you agree on this?
 
> 
> > --- a/app/test-pmd/csumonly.c
> > +++ b/app/test-pmd/csumonly.c
> > @@ -256,17 +256,16 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t
> outer_ethertype,
> >   	struct udp_hdr *udp_hdr;
> >   	uint64_t ol_flags = 0;
> >
> > -	if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> > -		ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
> > -
> >   	if (outer_ethertype == _htons(ETHER_TYPE_IPv4)) {
> >   		ipv4_hdr->hdr_checksum = 0;
> >
> > -		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> > +		if (testpmd_ol_flags &
> TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)
> >   			ol_flags |= PKT_TX_OUTER_IP_CKSUM;
> > -		else
> > +		else {
> >   			ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
> > -	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
> > +			ol_flags |= PKT_TX_OUTER_IPV4;
> > +		}
> > +	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
> >   		ol_flags |= PKT_TX_OUTER_IPV6;
> >
> >   	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + outer_l3_len);
> > @@ -300,11 +299,14 @@ process_outer_cksums(void *outer_l3_hdr, uint16_t
> outer_ethertype,
> >    *   Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 /
> >    *           UDP|TCP|SCTP
> >    *
> > - * The testpmd command line for this forward engine sets the flags
> > + * These testpmd command lines for this forward engine sets the flags
> >    * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control
> > - * wether a checksum must be calculated in software or in hardware.
> > The
> > - * IP, UDP, TCP and SCTP flags always concern the inner layer.  The
> > - * VxLAN flag concerns the outer IP (if packet is recognized as a vxlan packet).
> > + * wether a checksum must be calculated in software or in hardware.
> > + * In the case of tunneling packet, the IP, UDP, TCP and SCTP flags
> > + * always concern the inner layer; the outer IP flag always concern
> > + * the outer layer; the tunnel flag is used to tell the NIC that it
> > + * is a tunneing packet, want hardware offload for outer layer,
> > + * or inner layer, or both.
> 
> tunneing -> tunneling
> 
> "the tunnel flag is used to tell the NIC that it is a tunneing packet, want hardware
> offload for outer layer, or inner layer, or both."
> 
> what does that mean?

Ok,
Will replace the above the description with the following:
The tunnel flag  is used to set/clear the flag of enabling TX tunneling packet checksum hardware offload in application.

> 
> 
> >    */
> >   static void
> >   pkt_burst_checksum_forward(struct fwd_stream *fs) @@ -376,7 +378,9
> > @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> >   		l3_hdr = (char *)eth_hdr + l2_len;
> >
> >   		/* check if it's a supported tunnel (only vxlan for now) */
> > -		if (l4_proto == IPPROTO_UDP) {
> > +		if (((testpmd_ol_flags &
> > +			TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM) == 0)
> > +			&& (l4_proto == IPPROTO_UDP)) {
> >   			udp_hdr = (struct udp_hdr *)((char *)l3_hdr + l3_len);
> >
> >   			/* check udp destination port, 4789 is the default @@ -
> 386,17
> > +390,23 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> >   				tunnel = 1;
> >
> >   			/* currently, this flag is set by i40e only if the
> > -			 * packet is vxlan */
> > +			 * packet is a tunneling packet */
> >   			} else if (m->ol_flags & (PKT_RX_TUNNEL_IPV4_HDR |
> >   					PKT_RX_TUNNEL_IPV6_HDR))
> >   				tunnel = 1;
> >
> >   			if (tunnel == 1) {
> > +
> > +				if (testpmd_ol_flags
> > +					&
> TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
> > +					ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
> > +
> >   				outer_ethertype = ethertype;
> >   				outer_l2_len = l2_len;
> >   				outer_l3_len = l3_len;
> >   				outer_l3_hdr = l3_hdr;
> >
> > +				/* currently, only VXLAN packet is supported */
> >   				eth_hdr = (struct ether_hdr *)((char *)udp_hdr +
> >   					sizeof(struct udp_hdr) +
> >   					sizeof(struct vxlan_hdr));
> > @@ -434,7 +444,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> >   		/* step 4: fill the mbuf meta data (flags and header lengths) */
> >
> >   		if (tunnel == 1) {
> > -			if (testpmd_ol_flags &
> TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) {
> > +			if (testpmd_ol_flags &
> TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM) {
> >   				m->outer_l2_len = outer_l2_len;
> >   				m->outer_l3_len = outer_l3_len;
> >   				m->l2_len = l4_tun_len + l2_len; @@ -505,7
> +515,7 @@
> > pkt_burst_checksum_forward(struct fwd_stream *fs)
> >   					"m->l4_len=%d\n",
> >   					m->l2_len, m->l3_len, m->l4_len);
> >   			if ((tunnel == 1) &&
> > -				(testpmd_ol_flags &
> TESTPMD_TX_OFFLOAD_VXLAN_CKSUM))
> > +				(testpmd_ol_flags &
> TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM))
> >   				printf("tx: m->outer_l2_len=%d m-
> >outer_l3_len=%d\n",
> >   					m->outer_l2_len, m->outer_l3_len);
> >   			if (tso_segsz != 0)
> > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> > f8b0740..09caa6a 100644
> > --- a/app/test-pmd/testpmd.h
> > +++ b/app/test-pmd/testpmd.h
> > @@ -125,10 +125,20 @@ struct fwd_stream {
> >   #define TESTPMD_TX_OFFLOAD_TCP_CKSUM         0x0004
> >   /** Offload SCTP checksum in csum forward engine */
> >   #define TESTPMD_TX_OFFLOAD_SCTP_CKSUM        0x0008
> > -/** Offload VxLAN checksum in csum forward engine */
> > -#define TESTPMD_TX_OFFLOAD_VXLAN_CKSUM       0x0010
> > +/** Offload tunneling packet checksum in csum forward engine */
> > +#define TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM      0x0010
> >   /** Insert VLAN header in forward engine */
> >   #define TESTPMD_TX_OFFLOAD_INSERT_VLAN       0x0020
> > +/**
> > + * Offload outer-IP checksum in csum forward engine
> > + * for tunneling packet
> > + */
> > +#define TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM    0x0040
> > +/**
> > + * For a tunneling packet, user requests HW offload for its outer
> > + * layer checksum, and don't care is it a tunneled packet or not.
> > + */
> > +#define TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM  0x0080
> >
> >   /**
> >    * The data structure associated with each port.
> >
> 
> For now, I did not check the implementation of the patch. I'm not sure I
> understand the specifications, so I cannot check if the code conforms to it.
> 
> How this patch is tested? 
Yes, it is tested for i40e and ixgbe.

> I think a report similar to
> http://dpdk.org/ml/archives/dev/2014-November/007991.html would help to
> verify that it works (at least on one driver), and to understand the test-pmd API
> and the different use cases.
Ok, will send test report.

> Regards,
> Olivier
  

Patch

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index f79ea3e..9bfa9ef 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -316,16 +316,30 @@  static void cmd_help_long_parsed(void *parsed_result,
 			"    Disable hardware insertion of a VLAN header in"
 			" packets sent on a port.\n\n"
 
-			"tx_cksum set (ip|udp|tcp|sctp|vxlan) (hw|sw) (port_id)\n"
+			"tx_cksum set (ip|udp|tcp|sctp) (hw|sw) (port_id)\n"
 			"    Select hardware or software calculation of the"
 			" checksum with when transmitting a packet using the"
 			" csum forward engine.\n"
-			"    ip|udp|tcp|sctp always concern the inner layer.\n"
-			"    vxlan concerns the outer IP and UDP layer (in"
-			" case the packet is recognized as a vxlan packet by"
-			" the forward engine)\n"
+			"    In the case of tunneling packet, ip|udp|tcp|sctp"
+			" always concern the inner layer.\n\n"
+
+			"tx_cksum set tunnel (hw|sw|none) (port_id)\n"
+			" Select hardware or software calculation of the"
+			" checksum with when transmitting a tunneling packet"
+			" using the csum forward engine.\n"
+			" The none option means treat tunneling packet as ordinary"
+			" packet when using the csum forward engine\n."
+			"    Tunneling packet concerns the outer IP, inner IP"
+			" and inner L4\n"
 			"    Please check the NIC datasheet for HW limits.\n\n"
 
+			"tx_cksum set (outer-ip) (hw|sw) (port_id)\n"
+			"    Select hardware or software calculation of the"
+			" checksum with when transmitting a packet using the"
+			" csum forward engine.\n"
+			"    outer-ip always concern the outer layer of"
+			" tunneling packet.\n\n"
+
 			"tx_checksum show (port_id)\n"
 			"    Display tx checksum offload configuration\n\n"
 
@@ -2861,6 +2875,181 @@  cmdline_parse_inst_t cmd_tx_vlan_reset = {
 	},
 };
 
+/* ENABLE HARDWARE INSERTION OF CHECKSUM IN TX PACKETS FOR TUNNELING */
+struct cmd_tx_cksum_tunnel_result {
+	cmdline_fixed_string_t tx_cksum;
+	cmdline_fixed_string_t mode;
+	cmdline_fixed_string_t type;
+	cmdline_fixed_string_t hwsw;
+	uint8_t port_id;
+};
+
+static void
+cmd_tx_cksum_tunnel_parsed(void *parsed_result,
+		       __attribute__((unused)) struct cmdline *cl,
+		       __attribute__((unused)) void *data)
+{
+	struct cmd_tx_cksum_tunnel_result *res = parsed_result;
+	int hw = 0;
+	uint16_t ol_flags, mask = 0;
+
+	if (port_id_is_invalid(res->port_id)) {
+		printf("invalid port %d\n", res->port_id);
+		return;
+	}
+
+	if (!strcmp(res->mode, "set")) {
+
+		if (!strcmp(res->hwsw, "hw"))
+			hw = 1;
+		else if (!strcmp(res->hwsw, "none")) {
+			ports[res->port_id].tx_ol_flags &=
+				~(TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM
+				| TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM);
+			ports[res->port_id].tx_ol_flags |=
+				TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM;
+			return;
+		}
+
+		ports[res->port_id].tx_ol_flags &=
+				~TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM;
+		mask = TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM;
+
+		if (hw)
+			ports[res->port_id].tx_ol_flags |= mask;
+		else
+			ports[res->port_id].tx_ol_flags &= (~mask);
+	}
+
+	ol_flags = ports[res->port_id].tx_ol_flags;
+	printf("Tunnel checksum offload is %s\n",
+		(ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM) ? "hw" : "sw");
+}
+
+cmdline_parse_token_string_t cmd_tx_cksum_tunnel_tx_cksum =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_tunnel_result,
+				tx_cksum, "tx_checksum");
+cmdline_parse_token_string_t cmd_tx_cksum_tunnel_mode =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_tunnel_result,
+				mode, "set");
+cmdline_parse_token_string_t cmd_tx_cksum_tunnel_type =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_tunnel_result,
+				type, "tunnel");
+cmdline_parse_token_string_t cmd_tx_cksum_tunnel_hwsw =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_tunnel_result,
+				hwsw, "hw#sw#none");
+cmdline_parse_token_num_t cmd_tx_cksum_tunnel_portid =
+	TOKEN_NUM_INITIALIZER(struct cmd_tx_cksum_tunnel_result,
+				port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tx_cksum_tunnel_set = {
+	.f = cmd_tx_cksum_tunnel_parsed,
+	.data = NULL,
+	.help_str = "enable/disable hardware calculation of tunneling "
+		"packet checksum when using csum forward engine: tx_cksum set "
+		"tunnel hw|sw|none <port>",
+	.tokens = {
+		(void *)&cmd_tx_cksum_tunnel_tx_cksum,
+		(void *)&cmd_tx_cksum_tunnel_mode,
+		(void *)&cmd_tx_cksum_tunnel_type,
+		(void *)&cmd_tx_cksum_tunnel_hwsw,
+		(void *)&cmd_tx_cksum_tunnel_portid,
+		NULL,
+	},
+};
+
+/* ENABLE HARDWARE INSERTION OF OUTER CHECKSUM IN TX PACKETS FOR TUNNELING  */
+struct cmd_tx_cksum_outer_result {
+	cmdline_fixed_string_t tx_cksum;
+	cmdline_fixed_string_t mode;
+	cmdline_fixed_string_t proto;
+	cmdline_fixed_string_t hwsw;
+	uint8_t port_id;
+};
+
+static void
+cmd_tx_cksum_outer_parsed(void *parsed_result,
+		       __attribute__((unused)) struct cmdline *cl,
+		       __attribute__((unused)) void *data)
+{
+	struct cmd_tx_cksum_outer_result *res = parsed_result;
+	int hw = 0;
+	uint16_t ol_flags, mask = 0;
+	struct rte_eth_dev_info dev_info;
+
+	if (port_id_is_invalid(res->port_id)) {
+		printf("invalid port %d\n", res->port_id);
+		return;
+	}
+
+	if (!strcmp(res->mode, "set")) {
+
+		if (!strcmp(res->hwsw, "hw"))
+			hw = 1;
+
+		if (!strcmp(res->proto, "outer-ip")) {
+			if (ports[res->port_id].tx_ol_flags &
+				TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
+				mask = TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM;
+			else {
+				printf("Tunnel checksum flag must be"
+					" configured before enabling HW"
+					" outer IP checksum\n");
+				return;
+			}
+		}
+
+		if (hw)
+			ports[res->port_id].tx_ol_flags |= mask;
+		else
+			ports[res->port_id].tx_ol_flags &= (~mask);
+	}
+
+	ol_flags = ports[res->port_id].tx_ol_flags;
+	printf("Outer IP checksum offload is %s\n",
+		(ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) ? "hw" : "sw");
+
+	/* display warnings if configuration is not supported by the NIC */
+	rte_eth_dev_info_get(res->port_id, &dev_info);
+	if ((ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) &&
+		(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
+			== 0) {
+		printf("Warning: hardware Outer IP checksum enabled but not "
+			"supported by port %d\n", res->port_id);
+	}
+}
+
+cmdline_parse_token_string_t cmd_tx_cksum_outer_tx_cksum =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_outer_result,
+				tx_cksum, "tx_checksum");
+cmdline_parse_token_string_t cmd_tx_cksum_outer_mode =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_outer_result,
+				mode, "set");
+cmdline_parse_token_string_t cmd_tx_cksum_outer_proto =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_outer_result,
+				proto, "outer-ip");
+cmdline_parse_token_string_t cmd_tx_cksum_outer_hwsw =
+	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_outer_result,
+				hwsw, "hw#sw");
+cmdline_parse_token_num_t cmd_tx_cksum_outer_portid =
+	TOKEN_NUM_INITIALIZER(struct cmd_tx_cksum_outer_result,
+				port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tx_cksum_outer_set = {
+	.f = cmd_tx_cksum_outer_parsed,
+	.data = NULL,
+	.help_str = "enable/disable hardware calculation of outer L3"
+		" checksum for tunneling packet when using csum forward"
+		" engine:tx_cksum set outer-ip hw|sw <port>",
+	.tokens = {
+		(void *)&cmd_tx_cksum_outer_tx_cksum,
+		(void *)&cmd_tx_cksum_outer_mode,
+		(void *)&cmd_tx_cksum_outer_proto,
+		(void *)&cmd_tx_cksum_outer_hwsw,
+		(void *)&cmd_tx_cksum_outer_portid,
+		NULL,
+	},
+};
 
 /* *** ENABLE HARDWARE INSERTION OF CHECKSUM IN TX PACKETS *** */
 struct cmd_tx_cksum_result {
@@ -2899,8 +3088,6 @@  cmd_tx_cksum_parsed(void *parsed_result,
 			mask = TESTPMD_TX_OFFLOAD_TCP_CKSUM;
 		} else if (!strcmp(res->proto, "sctp")) {
 			mask = TESTPMD_TX_OFFLOAD_SCTP_CKSUM;
-		} else if (!strcmp(res->proto, "vxlan")) {
-			mask = TESTPMD_TX_OFFLOAD_VXLAN_CKSUM;
 		}
 
 		if (hw)
@@ -2918,8 +3105,6 @@  cmd_tx_cksum_parsed(void *parsed_result,
 		(ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ? "hw" : "sw");
 	printf("SCTP checksum offload is %s\n",
 		(ol_flags & TESTPMD_TX_OFFLOAD_SCTP_CKSUM) ? "hw" : "sw");
-	printf("VxLAN checksum offload is %s\n",
-		(ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) ? "hw" : "sw");
 
 	/* display warnings if configuration is not supported by the NIC */
 	rte_eth_dev_info_get(res->port_id, &dev_info);
@@ -2953,7 +3138,7 @@  cmdline_parse_token_string_t cmd_tx_cksum_mode =
 				mode, "set");
 cmdline_parse_token_string_t cmd_tx_cksum_proto =
 	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_result,
-				proto, "ip#tcp#udp#sctp#vxlan");
+				proto, "ip#tcp#udp#sctp");
 cmdline_parse_token_string_t cmd_tx_cksum_hwsw =
 	TOKEN_STRING_INITIALIZER(struct cmd_tx_cksum_result,
 				hwsw, "hw#sw");
@@ -2965,7 +3150,7 @@  cmdline_parse_inst_t cmd_tx_cksum_set = {
 	.f = cmd_tx_cksum_parsed,
 	.data = NULL,
 	.help_str = "enable/disable hardware calculation of L3/L4 checksum when "
-		"using csum forward engine: tx_cksum set ip|tcp|udp|sctp|vxlan hw|sw <port>",
+		"using csum forward engine: tx_cksum set ip|tcp|udp|sctp hw|sw <port>",
 	.tokens = {
 		(void *)&cmd_tx_cksum_tx_cksum,
 		(void *)&cmd_tx_cksum_mode,
@@ -8749,6 +8934,8 @@  cmdline_parse_ctx_t main_ctx[] = {
 	(cmdline_parse_inst_t *)&cmd_tx_vlan_reset,
 	(cmdline_parse_inst_t *)&cmd_tx_vlan_set_pvid,
 	(cmdline_parse_inst_t *)&cmd_tx_cksum_set,
+	(cmdline_parse_inst_t *)&cmd_tx_cksum_tunnel_set,
+	(cmdline_parse_inst_t *)&cmd_tx_cksum_outer_set,
 	(cmdline_parse_inst_t *)&cmd_tx_cksum_show,
 	(cmdline_parse_inst_t *)&cmd_tso_set,
 	(cmdline_parse_inst_t *)&cmd_tso_show,
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 41711fd..fa07b1f 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -256,17 +256,16 @@  process_outer_cksums(void *outer_l3_hdr, uint16_t outer_ethertype,
 	struct udp_hdr *udp_hdr;
 	uint64_t ol_flags = 0;
 
-	if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
-		ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
-
 	if (outer_ethertype == _htons(ETHER_TYPE_IPv4)) {
 		ipv4_hdr->hdr_checksum = 0;
 
-		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
+		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)
 			ol_flags |= PKT_TX_OUTER_IP_CKSUM;
-		else
+		else {
 			ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
-	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM)
+			ol_flags |= PKT_TX_OUTER_IPV4;
+		}
+	} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
 		ol_flags |= PKT_TX_OUTER_IPV6;
 
 	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + outer_l3_len);
@@ -300,11 +299,14 @@  process_outer_cksums(void *outer_l3_hdr, uint16_t outer_ethertype,
  *   Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 /
  *           UDP|TCP|SCTP
  *
- * The testpmd command line for this forward engine sets the flags
+ * These testpmd command lines for this forward engine sets the flags
  * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control
- * wether a checksum must be calculated in software or in hardware. The
- * IP, UDP, TCP and SCTP flags always concern the inner layer.  The
- * VxLAN flag concerns the outer IP (if packet is recognized as a vxlan packet).
+ * wether a checksum must be calculated in software or in hardware.
+ * In the case of tunneling packet, the IP, UDP, TCP and SCTP flags
+ * always concern the inner layer; the outer IP flag always concern
+ * the outer layer; the tunnel flag is used to tell the NIC that it
+ * is a tunneing packet, want hardware offload for outer layer,
+ * or inner layer, or both.
  */
 static void
 pkt_burst_checksum_forward(struct fwd_stream *fs)
@@ -376,7 +378,9 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 		l3_hdr = (char *)eth_hdr + l2_len;
 
 		/* check if it's a supported tunnel (only vxlan for now) */
-		if (l4_proto == IPPROTO_UDP) {
+		if (((testpmd_ol_flags &
+			TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM) == 0)
+			&& (l4_proto == IPPROTO_UDP)) {
 			udp_hdr = (struct udp_hdr *)((char *)l3_hdr + l3_len);
 
 			/* check udp destination port, 4789 is the default
@@ -386,17 +390,23 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 				tunnel = 1;
 
 			/* currently, this flag is set by i40e only if the
-			 * packet is vxlan */
+			 * packet is a tunneling packet */
 			} else if (m->ol_flags & (PKT_RX_TUNNEL_IPV4_HDR |
 					PKT_RX_TUNNEL_IPV6_HDR))
 				tunnel = 1;
 
 			if (tunnel == 1) {
+
+				if (testpmd_ol_flags
+					& TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM)
+					ol_flags |= PKT_TX_UDP_TUNNEL_PKT;
+
 				outer_ethertype = ethertype;
 				outer_l2_len = l2_len;
 				outer_l3_len = l3_len;
 				outer_l3_hdr = l3_hdr;
 
+				/* currently, only VXLAN packet is supported */
 				eth_hdr = (struct ether_hdr *)((char *)udp_hdr +
 					sizeof(struct udp_hdr) +
 					sizeof(struct vxlan_hdr));
@@ -434,7 +444,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 		/* step 4: fill the mbuf meta data (flags and header lengths) */
 
 		if (tunnel == 1) {
-			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM) {
+			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM) {
 				m->outer_l2_len = outer_l2_len;
 				m->outer_l3_len = outer_l3_len;
 				m->l2_len = l4_tun_len + l2_len;
@@ -505,7 +515,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 					"m->l4_len=%d\n",
 					m->l2_len, m->l3_len, m->l4_len);
 			if ((tunnel == 1) &&
-				(testpmd_ol_flags & TESTPMD_TX_OFFLOAD_VXLAN_CKSUM))
+				(testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM))
 				printf("tx: m->outer_l2_len=%d m->outer_l3_len=%d\n",
 					m->outer_l2_len, m->outer_l3_len);
 			if (tso_segsz != 0)
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index f8b0740..09caa6a 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -125,10 +125,20 @@  struct fwd_stream {
 #define TESTPMD_TX_OFFLOAD_TCP_CKSUM         0x0004
 /** Offload SCTP checksum in csum forward engine */
 #define TESTPMD_TX_OFFLOAD_SCTP_CKSUM        0x0008
-/** Offload VxLAN checksum in csum forward engine */
-#define TESTPMD_TX_OFFLOAD_VXLAN_CKSUM       0x0010
+/** Offload tunneling packet checksum in csum forward engine */
+#define TESTPMD_TX_OFFLOAD_TUNNEL_CKSUM      0x0010
 /** Insert VLAN header in forward engine */
 #define TESTPMD_TX_OFFLOAD_INSERT_VLAN       0x0020
+/**
+ * Offload outer-IP checksum in csum forward engine
+ * for tunneling packet
+ */
+#define TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM    0x0040
+/**
+ * For a tunneling packet, user requests HW offload for its outer
+ * layer checksum, and don't care is it a tunneled packet or not.
+ */
+#define TESTPMD_TX_OFFLOAD_NON_TUNNEL_CKSUM  0x0080
 
 /**
  * The data structure associated with each port.