[v2] app/testpmd: fix TX checksum calculation for tunnel

Message ID 20210727130757.30724-1-getelson@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Andrew Rybchenko
Headers
Series [v2] app/testpmd: fix TX checksum calculation for tunnel |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot success github build: passed
ci/iol-abi-testing success Testing PASS
ci/iol-testing fail Testing issues
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Etelson, Gregory July 27, 2021, 1:07 p.m. UTC
TX checksum of a tunnelled packet can be calculated for outer headers
only or for both outer and inner parts. The calculation method is
determined by application.
If TX checksum calculation can be offloaded, hardware ignores
existing checksum value and replaces it with an updated result.
If TX checksum is calculated by a software, existing value must be
zeroed first.
The testpmd checksum forwarding engine always zeroed inner checksums.
If inner checksum calculation was offloaded, that header was left
with 0 checksum value.
Following outer software checksum calculation produced wrong value.
The patch zeroes inner IPv4 checksum only before software calculation.

Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
Cc: stable@dpdk.org

Signed-off-by: Gregory Etelson <getelson@nvidia.com>
---
v2: 
 remove blank line between Fixes and Cc
 explicitly compare with 0 value in `if ()` 
---
 app/test-pmd/csumonly.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)
  

Comments

Li, Xiaoyun July 28, 2021, 1:31 a.m. UTC | #1
Hi

> -----Original Message-----
> From: Gregory Etelson <getelson@nvidia.com>
> Sent: Tuesday, July 27, 2021 21:08
> To: dev@dpdk.org
> Cc: getelson@nvidia.com; Ajit Khaparde <ajit.khaparde@broadcom.com>;
> Olivier Matz <olivier.matz@6wind.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Thomas Monjalon <thomas@monjalon.net>; stable@dpdk.org; Li, Xiaoyun
> <xiaoyun.li@intel.com>
> Subject: [PATCH v2] app/testpmd: fix TX checksum calculation for tunnel
> 
> TX checksum of a tunnelled packet can be calculated for outer headers only or
> for both outer and inner parts. The calculation method is determined by
> application.
> If TX checksum calculation can be offloaded, hardware ignores existing
> checksum value and replaces it with an updated result.
> If TX checksum is calculated by a software, existing value must be zeroed first.
> The testpmd checksum forwarding engine always zeroed inner checksums.
> If inner checksum calculation was offloaded, that header was left with 0
> checksum value.
> Following outer software checksum calculation produced wrong value.
> The patch zeroes inner IPv4 checksum only before software calculation.
> 
> Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> ---
> v2:
>  remove blank line between Fixes and Cc
>  explicitly compare with 0 value in `if ()`
> ---
>  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> 0161f72175..bd5ad64a57 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
> 
>  	if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
>  		ipv4_hdr = l3_hdr;
> -		ipv4_hdr->hdr_checksum = 0;
> 
>  		ol_flags |= PKT_TX_IPV4;
>  		if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
>  			ol_flags |= PKT_TX_IP_CKSUM;
>  		} else {
> -			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> +			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
>  				ol_flags |= PKT_TX_IP_CKSUM;
> -			else
> +			} else if (ipv4_hdr->hdr_checksum != 0) {
> +				ipv4_hdr->hdr_checksum = 0;
>  				ipv4_hdr->hdr_checksum =
>  					rte_ipv4_cksum(ipv4_hdr);
> +			}
>  		}
>  	} else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
>  		ol_flags |= PKT_TX_IPV6;
> @@ -501,10 +502,10 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>  		udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
>  		/* do not recalculate udp cksum if it was 0 */
>  		if (udp_hdr->dgram_cksum != 0) {
> -			udp_hdr->dgram_cksum = 0;
> -			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> +			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
>  				ol_flags |= PKT_TX_UDP_CKSUM;
> -			else {
> +			} else {

else if (udp_hdr->dgram_cksum != 0) { ?

> +				udp_hdr->dgram_cksum = 0;
>  				udp_hdr->dgram_cksum =
>  					get_udptcp_checksum(l3_hdr, udp_hdr,
>  						info->ethertype);
> @@ -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>  			ol_flags |= PKT_TX_UDP_SEG;
>  	} else if (info->l4_proto == IPPROTO_TCP) {
>  		tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
> -		tcp_hdr->cksum = 0;
>  		if (tso_segsz)
>  			ol_flags |= PKT_TX_TCP_SEG;
> -		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> +		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
>  			ol_flags |= PKT_TX_TCP_CKSUM;
> -		else {
> +		} else if (tcp_hdr->cksum != 0) {
> +			tcp_hdr->cksum = 0;
>  			tcp_hdr->cksum =
>  				get_udptcp_checksum(l3_hdr, tcp_hdr,
>  					info->ethertype);
> @@ -529,13 +530,13 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>  	} else if (info->l4_proto == IPPROTO_SCTP) {
>  		sctp_hdr = (struct rte_sctp_hdr *)
>  			((char *)l3_hdr + info->l3_len);
> -		sctp_hdr->cksum = 0;
>  		/* sctp payload must be a multiple of 4 to be
>  		 * offloaded */
>  		if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
>  			((ipv4_hdr->total_length & 0x3) == 0)) {
>  			ol_flags |= PKT_TX_SCTP_CKSUM;
> -		} else {
> +		} else if (sctp_hdr->cksum != 0) {
> +			sctp_hdr->cksum = 0;
>  			/* XXX implement CRC32c, example available in
>  			 * RFC3309 */
>  		}
> --
> 2.32.0
  
Etelson, Gregory July 28, 2021, 3:45 a.m. UTC | #2
Hello,

Please see below.

Regards,
Gregory

> > Subject: [PATCH v2] app/testpmd: fix TX checksum calculation for
> > tunnel
> >
> > TX checksum of a tunnelled packet can be calculated for outer headers
> > only or for both outer and inner parts. The calculation method is
> > determined by application.
> > If TX checksum calculation can be offloaded, hardware ignores existing
> > checksum value and replaces it with an updated result.
> > If TX checksum is calculated by a software, existing value must be zeroed
> first.
> > The testpmd checksum forwarding engine always zeroed inner checksums.
> > If inner checksum calculation was offloaded, that header was left with
> > 0 checksum value.
> > Following outer software checksum calculation produced wrong value.
> > The patch zeroes inner IPv4 checksum only before software calculation.
> >
> > Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> > ---
> > v2:
> >  remove blank line between Fixes and Cc  explicitly compare with 0
> > value in `if ()`
> > ---
> >  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
> >  1 file changed, 12 insertions(+), 11 deletions(-)
> >
> > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> > 0161f72175..bd5ad64a57 100644
> > --- a/app/test-pmd/csumonly.c
> > +++ b/app/test-pmd/csumonly.c
> > @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> >
> >       if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
> >               ipv4_hdr = l3_hdr;
> > -             ipv4_hdr->hdr_checksum = 0;
> >
> >               ol_flags |= PKT_TX_IPV4;
> >               if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
> >                       ol_flags |= PKT_TX_IP_CKSUM;
> >               } else {
> > -                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> > +                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
> >                               ol_flags |= PKT_TX_IP_CKSUM;
> > -                     else
> > +                     } else if (ipv4_hdr->hdr_checksum != 0) {
> > +                             ipv4_hdr->hdr_checksum = 0;
> >                               ipv4_hdr->hdr_checksum =
> >                                       rte_ipv4_cksum(ipv4_hdr);
> > +                     }
> >               }
> >       } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
> >               ol_flags |= PKT_TX_IPV6; @@ -501,10 +502,10 @@
> > process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info
> > *info,
> >               udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
> >               /* do not recalculate udp cksum if it was 0 */
> >               if (udp_hdr->dgram_cksum != 0) {
> > -                     udp_hdr->dgram_cksum = 0;
> > -                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> > +                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
> >                               ol_flags |= PKT_TX_UDP_CKSUM;
> > -                     else {
> > +                     } else {
> 
> else if (udp_hdr->dgram_cksum != 0) { ?
>

process_inner_cksums() function verifies udp_hdr->dgram_cksum != 0
before checking for hardware / software offload capabilities in enclosing 
if() statement. No need to repeat that verification
 
> > +                             udp_hdr->dgram_cksum = 0;
> >                               udp_hdr->dgram_cksum =
> >                                       get_udptcp_checksum(l3_hdr, udp_hdr,
> >                                               info->ethertype); @@
> > -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> >                       ol_flags |= PKT_TX_UDP_SEG;
> >       } else if (info->l4_proto == IPPROTO_TCP) {
> >               tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
> > -             tcp_hdr->cksum = 0;
> >               if (tso_segsz)
> >                       ol_flags |= PKT_TX_TCP_SEG;
> > -             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> > +             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
> >                       ol_flags |= PKT_TX_TCP_CKSUM;
> > -             else {
> > +             } else if (tcp_hdr->cksum != 0) {
> > +                     tcp_hdr->cksum = 0;
> >                       tcp_hdr->cksum =
> >                               get_udptcp_checksum(l3_hdr, tcp_hdr,
> >                                       info->ethertype); @@ -529,13
> > +530,13 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> >       } else if (info->l4_proto == IPPROTO_SCTP) {
> >               sctp_hdr = (struct rte_sctp_hdr *)
> >                       ((char *)l3_hdr + info->l3_len);
> > -             sctp_hdr->cksum = 0;
> >               /* sctp payload must be a multiple of 4 to be
> >                * offloaded */
> >               if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
> >                       ((ipv4_hdr->total_length & 0x3) == 0)) {
> >                       ol_flags |= PKT_TX_SCTP_CKSUM;
> > -             } else {
> > +             } else if (sctp_hdr->cksum != 0) {
> > +                     sctp_hdr->cksum = 0;
> >                       /* XXX implement CRC32c, example available in
> >                        * RFC3309 */
> >               }
> > --
> > 2.32.0
  
Ajit Khaparde July 28, 2021, 4:09 a.m. UTC | #3
On Tue, Jul 27, 2021 at 6:08 AM Gregory Etelson <getelson@nvidia.com> wrote:
>
> TX checksum of a tunnelled packet can be calculated for outer headers
> only or for both outer and inner parts. The calculation method is
> determined by application.
> If TX checksum calculation can be offloaded, hardware ignores
> existing checksum value and replaces it with an updated result.
> If TX checksum is calculated by a software, existing value must be
> zeroed first.
> The testpmd checksum forwarding engine always zeroed inner checksums.
> If inner checksum calculation was offloaded, that header was left
> with 0 checksum value.
> Following outer software checksum calculation produced wrong value.
> The patch zeroes inner IPv4 checksum only before software calculation.
>
> Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> Cc: stable@dpdk.org
>
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Acked-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
  
Li, Xiaoyun July 28, 2021, 5:07 a.m. UTC | #4
> -----Original Message-----
> From: Gregory Etelson <getelson@nvidia.com>
> Sent: Tuesday, July 27, 2021 21:08
> To: dev@dpdk.org
> Cc: getelson@nvidia.com; Ajit Khaparde <ajit.khaparde@broadcom.com>;
> Olivier Matz <olivier.matz@6wind.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Thomas Monjalon <thomas@monjalon.net>; stable@dpdk.org; Li, Xiaoyun
> <xiaoyun.li@intel.com>
> Subject: [PATCH v2] app/testpmd: fix TX checksum calculation for tunnel
> 
> TX checksum of a tunnelled packet can be calculated for outer headers only or
> for both outer and inner parts. The calculation method is determined by
> application.
> If TX checksum calculation can be offloaded, hardware ignores existing
> checksum value and replaces it with an updated result.
> If TX checksum is calculated by a software, existing value must be zeroed first.
> The testpmd checksum forwarding engine always zeroed inner checksums.
> If inner checksum calculation was offloaded, that header was left with 0
> checksum value.
> Following outer software checksum calculation produced wrong value.
> The patch zeroes inner IPv4 checksum only before software calculation.
> 
> Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> ---
> v2:
>  remove blank line between Fixes and Cc
>  explicitly compare with 0 value in `if ()`
> ---
>  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
  
Olivier Matz July 28, 2021, 2:12 p.m. UTC | #5
Hi Gregory,

Few comments below.

On Tue, Jul 27, 2021 at 04:07:57PM +0300, Gregory Etelson wrote:
> TX checksum of a tunnelled packet can be calculated for outer headers
> only or for both outer and inner parts. The calculation method is
> determined by application.
> If TX checksum calculation can be offloaded, hardware ignores
> existing checksum value and replaces it with an updated result.

This is not always true. Actually, the checksum value is optionally
set by software to the value that is expected by the hardware to offload
the checksum correctly. This is done through rte_eth_tx_prepare(), which
is called in csumonly test engine.

For instance, on an ixgbe NIC, it does:

  rte_eth_tx_prepare()
    eth_dev->tx_pkt_prepare()
      ixgbe_prep_pkts()
        rte_net_intel_cksum_flags_prepare()
          if packet is IP, set IP checksum to 0
          if packet is TCP or UDP, set L4 checksum to the phdr csum

This driver-specific rte_eth_tx_prepare() can indeed do nothing and let
the hardware ignore the checksum in the packet.

> If TX checksum is calculated by a software, existing value must be
> zeroed first.
> The testpmd checksum forwarding engine always zeroed inner checksums.
> If inner checksum calculation was offloaded, that header was left
> with 0 checksum value.
> Following outer software checksum calculation produced wrong value.
> The patch zeroes inner IPv4 checksum only before software calculation.

Sorry, I think I don't understand the issue. Are you trying to compute
the inner checksum by hardware and the outer checksum by software?

> Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")

I'm not sure the problem origin is this commit (however, I may have
misunderstood your issue).

At the time this commit was done, it was required to set the TCP/UDP
checksum to the pseudo header checksum to offload an L4 checksum. See:
https://git.dpdk.org/dpdk/tree/lib/librte_mbuf/rte_mbuf.h?id=51f694dd40f5#n107

The introduction of rte_eth_tx_prepare() API removed this need, see:
https://git.dpdk.org/dpdk/commit/?id=6b520d54ebfe

Thanks,
Olivier

> Cc: stable@dpdk.org
> 
> Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> ---
> v2: 
>  remove blank line between Fixes and Cc
>  explicitly compare with 0 value in `if ()` 
> ---
>  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> index 0161f72175..bd5ad64a57 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
>  
>  	if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
>  		ipv4_hdr = l3_hdr;
> -		ipv4_hdr->hdr_checksum = 0;
>  
>  		ol_flags |= PKT_TX_IPV4;
>  		if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
>  			ol_flags |= PKT_TX_IP_CKSUM;
>  		} else {
> -			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> +			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
>  				ol_flags |= PKT_TX_IP_CKSUM;
> -			else
> +			} else if (ipv4_hdr->hdr_checksum != 0) {
> +				ipv4_hdr->hdr_checksum = 0;
>  				ipv4_hdr->hdr_checksum =
>  					rte_ipv4_cksum(ipv4_hdr);
> +			}
>  		}
>  	} else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
>  		ol_flags |= PKT_TX_IPV6;
> @@ -501,10 +502,10 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
>  		udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
>  		/* do not recalculate udp cksum if it was 0 */
>  		if (udp_hdr->dgram_cksum != 0) {
> -			udp_hdr->dgram_cksum = 0;
> -			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> +			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
>  				ol_flags |= PKT_TX_UDP_CKSUM;
> -			else {
> +			} else {
> +				udp_hdr->dgram_cksum = 0;
>  				udp_hdr->dgram_cksum =
>  					get_udptcp_checksum(l3_hdr, udp_hdr,
>  						info->ethertype);
> @@ -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
>  			ol_flags |= PKT_TX_UDP_SEG;
>  	} else if (info->l4_proto == IPPROTO_TCP) {
>  		tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
> -		tcp_hdr->cksum = 0;
>  		if (tso_segsz)
>  			ol_flags |= PKT_TX_TCP_SEG;
> -		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> +		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
>  			ol_flags |= PKT_TX_TCP_CKSUM;
> -		else {
> +		} else if (tcp_hdr->cksum != 0) {
> +			tcp_hdr->cksum = 0;
>  			tcp_hdr->cksum =
>  				get_udptcp_checksum(l3_hdr, tcp_hdr,
>  					info->ethertype);
> @@ -529,13 +530,13 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
>  	} else if (info->l4_proto == IPPROTO_SCTP) {
>  		sctp_hdr = (struct rte_sctp_hdr *)
>  			((char *)l3_hdr + info->l3_len);
> -		sctp_hdr->cksum = 0;
>  		/* sctp payload must be a multiple of 4 to be
>  		 * offloaded */
>  		if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
>  			((ipv4_hdr->total_length & 0x3) == 0)) {
>  			ol_flags |= PKT_TX_SCTP_CKSUM;
> -		} else {
> +		} else if (sctp_hdr->cksum != 0) {
> +			sctp_hdr->cksum = 0;
>  			/* XXX implement CRC32c, example available in
>  			 * RFC3309 */
>  		}
> -- 
> 2.32.0
>
  
Etelson, Gregory July 28, 2021, 4:07 p.m. UTC | #6
Hello Oliver,

Please see my comments below

> On Tue, Jul 27, 2021 at 04:07:57PM +0300, Gregory Etelson wrote:
> > TX checksum of a tunnelled packet can be calculated for outer headers
> > only or for both outer and inner parts. The calculation method is
> > determined by application.
> > If TX checksum calculation can be offloaded, hardware ignores existing
> > checksum value and replaces it with an updated result.
> 
> This is not always true. Actually, the checksum value is optionally set by
> software to the value that is expected by the hardware to offload the
> checksum correctly. This is done through rte_eth_tx_prepare(), which is called
> in csumonly test engine.
> 
> For instance, on an ixgbe NIC, it does:
> 
>   rte_eth_tx_prepare()
>     eth_dev->tx_pkt_prepare()
>       ixgbe_prep_pkts()
>         rte_net_intel_cksum_flags_prepare()
>           if packet is IP, set IP checksum to 0
>           if packet is TCP or UDP, set L4 checksum to the phdr csum
> 
> This driver-specific rte_eth_tx_prepare() can indeed do nothing and let the
> hardware ignore the checksum in the packet.
>

You are right. I'll update the patch comment in v3.
 
> > If TX checksum is calculated by a software, existing value must be
> > zeroed first.
> > The testpmd checksum forwarding engine always zeroed inner checksums.
> > If inner checksum calculation was offloaded, that header was left with
> > 0 checksum value.
> > Following outer software checksum calculation produced wrong value.
> > The patch zeroes inner IPv4 checksum only before software calculation.
> 
> Sorry, I think I don't understand the issue. Are you trying to compute the inner
> checksum by hardware and the outer checksum by software?
> 

Correct. Inner checksum is offloaded and outer computed in software.
 
Consider this example:
Tunneled packet arrived at port A and being forwarded through port B.
The packet arrived at port A with correct inner checksums - L3 and L4.
Port B TX offloads inner L3 only.

process_inner_cksums() sets "ipv4_hdr->hdr_checksum = 0;" unconditionally.
Inner L3 checksum value will be restored by port B TX checksum offload, but when 
process_outer_cksums() runs software calculation on outer L4 it will use 0 and produce wrong result.

Therefore, the patch zeros inner checksum values only before actual software calculations.

> > Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> 
> I'm not sure the problem origin is this commit (however, I may have
> misunderstood your issue).
> 
> At the time this commit was done, it was required to set the TCP/UDP
> checksum to the pseudo header checksum to offload an L4 checksum. See:
> https://git.dpdk.org/dpdk/tree/lib/librte_mbuf/rte_mbuf.h?id=51f694dd40f5
> #n107
> 
> The introduction of rte_eth_tx_prepare() API removed this need, see:
> https://git.dpdk.org/dpdk/commit/?id=6b520d54ebfe
> 
> Thanks,
> Olivier
> 
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> > ---
> > v2:
> >  remove blank line between Fixes and Cc  explicitly compare with 0
> > value in `if ()`
> > ---
> >  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
> >  1 file changed, 12 insertions(+), 11 deletions(-)
> >
> > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> > 0161f72175..bd5ad64a57 100644
> > --- a/app/test-pmd/csumonly.c
> > +++ b/app/test-pmd/csumonly.c
> > @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> >
> >       if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
> >               ipv4_hdr = l3_hdr;
> > -             ipv4_hdr->hdr_checksum = 0;
> >
> >               ol_flags |= PKT_TX_IPV4;
> >               if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
> >                       ol_flags |= PKT_TX_IP_CKSUM;
> >               } else {
> > -                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> > +                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
> >                               ol_flags |= PKT_TX_IP_CKSUM;
> > -                     else
> > +                     } else if (ipv4_hdr->hdr_checksum != 0) {
> > +                             ipv4_hdr->hdr_checksum = 0;
> >                               ipv4_hdr->hdr_checksum =
> >                                       rte_ipv4_cksum(ipv4_hdr);
> > +                     }
> >               }
> >       } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
> >               ol_flags |= PKT_TX_IPV6; @@ -501,10 +502,10 @@
> > process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info
> *info,
> >               udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
> >               /* do not recalculate udp cksum if it was 0 */
> >               if (udp_hdr->dgram_cksum != 0) {
> > -                     udp_hdr->dgram_cksum = 0;
> > -                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> > +                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
> >                               ol_flags |= PKT_TX_UDP_CKSUM;
> > -                     else {
> > +                     } else {
> > +                             udp_hdr->dgram_cksum = 0;
> >                               udp_hdr->dgram_cksum =
> >                                       get_udptcp_checksum(l3_hdr, udp_hdr,
> >                                               info->ethertype); @@
> > -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
> >                       ol_flags |= PKT_TX_UDP_SEG;
> >       } else if (info->l4_proto == IPPROTO_TCP) {
> >               tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
> > -             tcp_hdr->cksum = 0;
> >               if (tso_segsz)
> >                       ol_flags |= PKT_TX_TCP_SEG;
> > -             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> > +             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
> >                       ol_flags |= PKT_TX_TCP_CKSUM;
> > -             else {
> > +             } else if (tcp_hdr->cksum != 0) {
> > +                     tcp_hdr->cksum = 0;
> >                       tcp_hdr->cksum =
> >                               get_udptcp_checksum(l3_hdr, tcp_hdr,
> >                                       info->ethertype); @@ -529,13
> > +530,13 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
> >       } else if (info->l4_proto == IPPROTO_SCTP) {
> >               sctp_hdr = (struct rte_sctp_hdr *)
> >                       ((char *)l3_hdr + info->l3_len);
> > -             sctp_hdr->cksum = 0;
> >               /* sctp payload must be a multiple of 4 to be
> >                * offloaded */
> >               if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
> >                       ((ipv4_hdr->total_length & 0x3) == 0)) {
> >                       ol_flags |= PKT_TX_SCTP_CKSUM;
> > -             } else {
> > +             } else if (sctp_hdr->cksum != 0) {
> > +                     sctp_hdr->cksum = 0;
> >                       /* XXX implement CRC32c, example available in
> >                        * RFC3309 */
> >               }
> > --
> > 2.32.0
> >
  
Olivier Matz July 29, 2021, 8:25 a.m. UTC | #7
On Wed, Jul 28, 2021 at 04:07:51PM +0000, Gregory Etelson wrote:
> Hello Oliver,
> 
> Please see my comments below
> 
> > On Tue, Jul 27, 2021 at 04:07:57PM +0300, Gregory Etelson wrote:
> > > TX checksum of a tunnelled packet can be calculated for outer headers
> > > only or for both outer and inner parts. The calculation method is
> > > determined by application.
> > > If TX checksum calculation can be offloaded, hardware ignores existing
> > > checksum value and replaces it with an updated result.
> > 
> > This is not always true. Actually, the checksum value is optionally set by
> > software to the value that is expected by the hardware to offload the
> > checksum correctly. This is done through rte_eth_tx_prepare(), which is called
> > in csumonly test engine.
> > 
> > For instance, on an ixgbe NIC, it does:
> > 
> >   rte_eth_tx_prepare()
> >     eth_dev->tx_pkt_prepare()
> >       ixgbe_prep_pkts()
> >         rte_net_intel_cksum_flags_prepare()
> >           if packet is IP, set IP checksum to 0
> >           if packet is TCP or UDP, set L4 checksum to the phdr csum
> > 
> > This driver-specific rte_eth_tx_prepare() can indeed do nothing and let the
> > hardware ignore the checksum in the packet.
> >
> 
> You are right. I'll update the patch comment in v3.
>  
> > > If TX checksum is calculated by a software, existing value must be
> > > zeroed first.
> > > The testpmd checksum forwarding engine always zeroed inner checksums.
> > > If inner checksum calculation was offloaded, that header was left with
> > > 0 checksum value.
> > > Following outer software checksum calculation produced wrong value.
> > > The patch zeroes inner IPv4 checksum only before software calculation.
> > 
> > Sorry, I think I don't understand the issue. Are you trying to compute the inner
> > checksum by hardware and the outer checksum by software?
> > 
> 
> Correct. Inner checksum is offloaded and outer computed in software.

I think this approach is not sane: the value of the outer checksum depends
on the inner checksum, so it has to be calculated after. There is a comment
in the code about this:

	/* Then process outer headers if any. Note that the software
	 * checksum will be wrong if one of the inner checksums is
	 * processed in hardware. */
	if (info.is_tunnel == 1) {
		tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
				tx_offloads,
				!!(tx_ol_flags & PKT_TX_TCP_SEG));
	}


> Consider this example:
> Tunneled packet arrived at port A and being forwarded through port B.
> The packet arrived at port A with correct inner checksums - L3 and L4.
> Port B TX offloads inner L3 only.
> 
> process_inner_cksums() sets "ipv4_hdr->hdr_checksum = 0;" unconditionally.
> Inner L3 checksum value will be restored by port B TX checksum offload, but when 
> process_outer_cksums() runs software calculation on outer L4 it will use 0 and produce wrong result.
> 
> Therefore, the patch zeros inner checksum values only before actual software calculations.

I better understand your use case, thanks.

However, with your patch, if the inner L4 checksum is wrong when it
arrives on port A, I think it will result in a packet with a wrong outer
L4 checksum and a correct inner L4 checksum. Is it what you expect?

I don't argue against the patch itself. What you suggest better matches
the offload API than what we have today. Can you please send another
version that better explains the use-case?

One more suggestion, maybe for later. Currently, the csumonly engine can
be configured to do the checksum in sw or in hw. Maybe we could add a
"dont-touch" option, to keep the value in the packet. Would it help for
your use-case?

> 
> > > Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> > 
> > I'm not sure the problem origin is this commit (however, I may have
> > misunderstood your issue).
> > 
> > At the time this commit was done, it was required to set the TCP/UDP
> > checksum to the pseudo header checksum to offload an L4 checksum. See:
> > https://git.dpdk.org/dpdk/tree/lib/librte_mbuf/rte_mbuf.h?id=51f694dd40f5
> > #n107
> > 
> > The introduction of rte_eth_tx_prepare() API removed this need, see:
> > https://git.dpdk.org/dpdk/commit/?id=6b520d54ebfe

Just a reminder for this one.

Thanks,
Olivier


> > Thanks,
> > Olivier
> > 
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Gregory Etelson <getelson@nvidia.com>
> > > ---
> > > v2:
> > >  remove blank line between Fixes and Cc  explicitly compare with 0
> > > value in `if ()`
> > > ---
> > >  app/test-pmd/csumonly.c | 23 ++++++++++++-----------
> > >  1 file changed, 12 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> > > 0161f72175..bd5ad64a57 100644
> > > --- a/app/test-pmd/csumonly.c
> > > +++ b/app/test-pmd/csumonly.c
> > > @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct
> > > testpmd_offload_info *info,
> > >
> > >       if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
> > >               ipv4_hdr = l3_hdr;
> > > -             ipv4_hdr->hdr_checksum = 0;
> > >
> > >               ol_flags |= PKT_TX_IPV4;
> > >               if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
> > >                       ol_flags |= PKT_TX_IP_CKSUM;
> > >               } else {
> > > -                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> > > +                     if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
> > >                               ol_flags |= PKT_TX_IP_CKSUM;
> > > -                     else
> > > +                     } else if (ipv4_hdr->hdr_checksum != 0) {
> > > +                             ipv4_hdr->hdr_checksum = 0;
> > >                               ipv4_hdr->hdr_checksum =
> > >                                       rte_ipv4_cksum(ipv4_hdr);
> > > +                     }
> > >               }
> > >       } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
> > >               ol_flags |= PKT_TX_IPV6; @@ -501,10 +502,10 @@
> > > process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info
> > *info,
> > >               udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
> > >               /* do not recalculate udp cksum if it was 0 */
> > >               if (udp_hdr->dgram_cksum != 0) {
> > > -                     udp_hdr->dgram_cksum = 0;
> > > -                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> > > +                     if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
> > >                               ol_flags |= PKT_TX_UDP_CKSUM;
> > > -                     else {
> > > +                     } else {
> > > +                             udp_hdr->dgram_cksum = 0;
> > >                               udp_hdr->dgram_cksum =
> > >                                       get_udptcp_checksum(l3_hdr, udp_hdr,
> > >                                               info->ethertype); @@
> > > -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> > >                       ol_flags |= PKT_TX_UDP_SEG;
> > >       } else if (info->l4_proto == IPPROTO_TCP) {
> > >               tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
> > > -             tcp_hdr->cksum = 0;
> > >               if (tso_segsz)
> > >                       ol_flags |= PKT_TX_TCP_SEG;
> > > -             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> > > +             else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
> > >                       ol_flags |= PKT_TX_TCP_CKSUM;
> > > -             else {
> > > +             } else if (tcp_hdr->cksum != 0) {
> > > +                     tcp_hdr->cksum = 0;
> > >                       tcp_hdr->cksum =
> > >                               get_udptcp_checksum(l3_hdr, tcp_hdr,
> > >                                       info->ethertype); @@ -529,13
> > > +530,13 @@ process_inner_cksums(void *l3_hdr, const struct
> > testpmd_offload_info *info,
> > >       } else if (info->l4_proto == IPPROTO_SCTP) {
> > >               sctp_hdr = (struct rte_sctp_hdr *)
> > >                       ((char *)l3_hdr + info->l3_len);
> > > -             sctp_hdr->cksum = 0;
> > >               /* sctp payload must be a multiple of 4 to be
> > >                * offloaded */
> > >               if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
> > >                       ((ipv4_hdr->total_length & 0x3) == 0)) {
> > >                       ol_flags |= PKT_TX_SCTP_CKSUM;
> > > -             } else {
> > > +             } else if (sctp_hdr->cksum != 0) {
> > > +                     sctp_hdr->cksum = 0;
> > >                       /* XXX implement CRC32c, example available in
> > >                        * RFC3309 */
> > >               }
> > > --
> > > 2.32.0
> > >
  
Etelson, Gregory July 29, 2021, 10:31 a.m. UTC | #8
Hello Olivier,

[:snip:]
> >
> > Correct. Inner checksum is offloaded and outer computed in software.
> 
> I think this approach is not sane: the value of the outer checksum depends on
> the inner checksum, so it has to be calculated after. There is a comment in the
> code about this:
> 
>         /* Then process outer headers if any. Note that the software
>          * checksum will be wrong if one of the inner checksums is
>          * processed in hardware. */
>         if (info.is_tunnel == 1) {
>                 tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
>                                 tx_offloads,
>                                 !!(tx_ol_flags & PKT_TX_TCP_SEG));
>         }

I agree. That computation order led to error in my case.
What if the engine could analyze port TX offload options and select 
processing order according to existing configuration ?
 
> > Consider this example:
> > Tunneled packet arrived at port A and being forwarded through port B.
> > The packet arrived at port A with correct inner checksums - L3 and L4.
> > Port B TX offloads inner L3 only.
> >
> > process_inner_cksums() sets "ipv4_hdr->hdr_checksum = 0;"
> unconditionally.
> > Inner L3 checksum value will be restored by port B TX checksum
> > offload, but when
> > process_outer_cksums() runs software calculation on outer L4 it will use 0
> and produce wrong result.
> >
> > Therefore, the patch zeros inner checksum values only before actual
> software calculations.
> 
> I better understand your use case, thanks.
> 
> However, with your patch, if the inner L4 checksum is wrong when it arrives
> on port A, I think it will result in a packet with a wrong outer
> L4 checksum and a correct inner L4 checksum. Is it what you expect?
> 

Wrong checksum reflects ongoing issue that must be investigated and fixed.
I don't expect forwarding engine to fix wrong checksum because it can hide
a real problem.

> I don't argue against the patch itself. What you suggest better matches the
> offload API than what we have today. Can you please send another version
> that better explains the use-case?
> 

I posted v3 with updated comment.

> One more suggestion, maybe for later. Currently, the csumonly engine can be
> configured to do the checksum in sw or in hw. Maybe we could add a "dont-
> touch" option, to keep the value in the packet. Would it help for your use-
> case?
> 

That's a very good idea.
Packets can arrive with pre-calculated correct checksums. Keeping these values
can save processing time.

[:snip:]

Regards,
Gregory
  
Olivier Matz July 29, 2021, 4:02 p.m. UTC | #9
On Thu, Jul 29, 2021 at 10:31:45AM +0000, Gregory Etelson wrote:
> Hello Olivier,
> 
> [:snip:]
> > >
> > > Correct. Inner checksum is offloaded and outer computed in software.
> > 
> > I think this approach is not sane: the value of the outer checksum depends on
> > the inner checksum, so it has to be calculated after. There is a comment in the
> > code about this:
> > 
> >         /* Then process outer headers if any. Note that the software
> >          * checksum will be wrong if one of the inner checksums is
> >          * processed in hardware. */
> >         if (info.is_tunnel == 1) {
> >                 tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> >                                 tx_offloads,
> >                                 !!(tx_ol_flags & PKT_TX_TCP_SEG));
> >         }
> 
> I agree. That computation order led to error in my case.
> What if the engine could analyze port TX offload options and select 
> processing order according to existing configuration ?

I really think hardware inner checksum + software outer checksum is
broken by design, because outer checksum depends on inner checksum.

> > > Consider this example:
> > > Tunneled packet arrived at port A and being forwarded through port B.
> > > The packet arrived at port A with correct inner checksums - L3 and L4.
> > > Port B TX offloads inner L3 only.
> > >
> > > process_inner_cksums() sets "ipv4_hdr->hdr_checksum = 0;"
> > unconditionally.
> > > Inner L3 checksum value will be restored by port B TX checksum
> > > offload, but when
> > > process_outer_cksums() runs software calculation on outer L4 it will use 0
> > and produce wrong result.
> > >
> > > Therefore, the patch zeros inner checksum values only before actual
> > software calculations.
> > 
> > I better understand your use case, thanks.
> > 
> > However, with your patch, if the inner L4 checksum is wrong when it arrives
> > on port A, I think it will result in a packet with a wrong outer
> > L4 checksum and a correct inner L4 checksum. Is it what you expect?
> > 
> 
> Wrong checksum reflects ongoing issue that must be investigated and fixed.
> I don't expect forwarding engine to fix wrong checksum because it can hide
> a real problem.

Yes and no :)

We should keep in mind that this engine is a demo / test program for
checksum API. A real-world application should detect and drop a packet
with a wrong checksum.

> > I don't argue against the patch itself. What you suggest better matches the
> > offload API than what we have today. Can you please send another version
> > that better explains the use-case?
> > 
> 
> I posted v3 with updated comment.
> 
> > One more suggestion, maybe for later. Currently, the csumonly engine can be
> > configured to do the checksum in sw or in hw. Maybe we could add a "dont-
> > touch" option, to keep the value in the packet. Would it help for your use-
> > case?
> > 
> 
> That's a very good idea.
> Packets can arrive with pre-calculated correct checksums. Keeping these values
> can save processing time.
> 
> [:snip:]
> 
> Regards,
> Gregory
  

Patch

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 0161f72175..bd5ad64a57 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -480,17 +480,18 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 
 	if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
 		ipv4_hdr = l3_hdr;
-		ipv4_hdr->hdr_checksum = 0;
 
 		ol_flags |= PKT_TX_IPV4;
 		if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
 			ol_flags |= PKT_TX_IP_CKSUM;
 		} else {
-			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
+			if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
 				ol_flags |= PKT_TX_IP_CKSUM;
-			else
+			} else if (ipv4_hdr->hdr_checksum != 0) {
+				ipv4_hdr->hdr_checksum = 0;
 				ipv4_hdr->hdr_checksum =
 					rte_ipv4_cksum(ipv4_hdr);
+			}
 		}
 	} else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
 		ol_flags |= PKT_TX_IPV6;
@@ -501,10 +502,10 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 		udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
 		/* do not recalculate udp cksum if it was 0 */
 		if (udp_hdr->dgram_cksum != 0) {
-			udp_hdr->dgram_cksum = 0;
-			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
+			if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
 				ol_flags |= PKT_TX_UDP_CKSUM;
-			else {
+			} else {
+				udp_hdr->dgram_cksum = 0;
 				udp_hdr->dgram_cksum =
 					get_udptcp_checksum(l3_hdr, udp_hdr,
 						info->ethertype);
@@ -514,12 +515,12 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 			ol_flags |= PKT_TX_UDP_SEG;
 	} else if (info->l4_proto == IPPROTO_TCP) {
 		tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
-		tcp_hdr->cksum = 0;
 		if (tso_segsz)
 			ol_flags |= PKT_TX_TCP_SEG;
-		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
+		else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
 			ol_flags |= PKT_TX_TCP_CKSUM;
-		else {
+		} else if (tcp_hdr->cksum != 0) {
+			tcp_hdr->cksum = 0;
 			tcp_hdr->cksum =
 				get_udptcp_checksum(l3_hdr, tcp_hdr,
 					info->ethertype);
@@ -529,13 +530,13 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
 	} else if (info->l4_proto == IPPROTO_SCTP) {
 		sctp_hdr = (struct rte_sctp_hdr *)
 			((char *)l3_hdr + info->l3_len);
-		sctp_hdr->cksum = 0;
 		/* sctp payload must be a multiple of 4 to be
 		 * offloaded */
 		if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
 			((ipv4_hdr->total_length & 0x3) == 0)) {
 			ol_flags |= PKT_TX_SCTP_CKSUM;
-		} else {
+		} else if (sctp_hdr->cksum != 0) {
+			sctp_hdr->cksum = 0;
 			/* XXX implement CRC32c, example available in
 			 * RFC3309 */
 		}