[3/4] net: introduce functions to verify L4 checksums

Message ID 20210427135755.927-4-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Andrew Rybchenko
Headers
Series net/tap: fix Rx cksum |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Olivier Matz April 27, 2021, 1:57 p.m. UTC
  Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
checksum 0"), the functions rte_ipv4_udptcp_cksum() and
rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
verify a packet containing a valid checksum.

Since these functions should be used to calculate the checksum to set in
a packet, introduce 2 new helpers for checksum verification. They return
0 if the checksum is valid in the packet.

Use this new helper in net/tap driver.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 drivers/net/tap/rte_eth_tap.c |   7 +-
 lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
 2 files changed, 104 insertions(+), 27 deletions(-)
  

Comments

Morten Brørup April 27, 2021, 3:02 p.m. UTC | #1
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, April 27, 2021 3:58 PM
> 
> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
> verify a packet containing a valid checksum.
> 
> Since these functions should be used to calculate the checksum to set
> in
> a packet, introduce 2 new helpers for checksum verification. They
> return
> 0 if the checksum is valid in the packet.
> 
> Use this new helper in net/tap driver.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/tap/rte_eth_tap.c |   7 +-
>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>  2 files changed, 104 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c
> b/drivers/net/tap/rte_eth_tap.c
> index 71282e8065..b14d5a1d55 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>  					return;
>  				}
>  			}
> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		}
> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>  		mbuf->ol_flags |= cksum_ok ?
>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>  	}
> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
> index 8c189009b0..ef84bcc5bf 100644
> --- a/lib/net/rte_ip.h
> +++ b/lib/net/rte_ip.h
> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, uint64_t ol_flags)
>  }
> 
>  /**
> - * Process the IPv4 UDP or TCP checksum.
> - *
> - * The IP and layer 4 checksum must be set to 0 in the packet by
> - * the caller.
> - *
> - * @param ipv4_hdr
> - *   The pointer to the contiguous IPv4 header.
> - * @param l4_hdr
> - *   The pointer to the beginning of the L4 header.
> - * @return
> - *   The complemented checksum to set in the IP packet.
> + * @internal Calculate the non-complemented IPv4 L4 checksum
>   */
>  static inline uint16_t
> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void
> *l4_hdr)
> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const
> void *l4_hdr)
>  {
>  	uint32_t cksum;
>  	uint32_t l3_len, l4_len;
> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, const void *l4_hdr)
>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
> 
>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
> +
> +	return (uint16_t)cksum;
> +}
> +
> +/**
> + * Process the IPv4 UDP or TCP checksum.
> + *
> + * The IP and layer 4 checksum must be set to 0 in the packet by
> + * the caller.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   The complemented checksum to set in the IP packet.
> + */
> +static inline uint16_t
> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void
> *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	cksum = ~cksum;
> +
>  	/*
> -	 * Per RFC 768:If the computed checksum is zero for UDP,
> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
>  	 * (the equivalent in one's complement arithmetic).
>  	 */
>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>  		cksum = 0xffff;
> 
> -	return (uint16_t)cksum;
> +	return cksum;
> +}

The GCC static branch predictor treats the above comparison as likely. Playing around with Godbolt, I came up with this alternative:

	if (likely(cksum != 0)) return cksum;
	if (ipv4_hdr->next_proto_id == IPPROTO_UDP) return 0xffff;
	return cksum;

> +
> +/**
> + * Validate the IPv4 UDP or TCP checksum.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	if (cksum != 0xffff)
> +		return -1;

The GCC static branch predictor treats the above comparison as likely, so I would prefer unlikely() around it.

> +
> +	return 0;
>  }
> 
>  /**
> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>  	return __rte_raw_cksum_reduce(sum);
>  }
> 
> +/**
> + * @internal Calculate the non-complemented IPv4 L4 checksum
> + */
> +static inline uint16_t
> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const
> void *l4_hdr)
> +{
> +	uint32_t cksum;
> +	uint32_t l4_len;
> +
> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +
> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +
> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> +
> +	return (uint16_t)cksum;
> +}
> +
>  /**
>   * Process the IPv6 UDP or TCP checksum.
>   *
> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>  static inline uint16_t
>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void
> *l4_hdr)
>  {
> -	uint32_t cksum;
> -	uint32_t l4_len;
> -
> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> 
> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +	cksum = ~cksum;
> 
> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
>  	/*
>  	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, const void *l4_hdr)
>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
>  		cksum = 0xffff;

Same comment about GCC static branch prediction as above.

> 
> -	return (uint16_t)cksum;
> +	return cksum;
> +}
> +
> +/**
> + * Validate the IPv6 UDP or TCP checksum.
> + *
> + * The function accepts a 0 checksum, since it can exceptionally
> happen. See 8.1
> + * (Upper-Layer Checksums) in RFC 8200.
> + *
> + * @param ipv6_hdr
> + *   The pointer to the contiguous IPv6 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum;
> +
> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> +	if (cksum != 0xffff)
> +		return -1;

Same comment about GCC static branch prediction as above.

> +
> +	return 0;
>  }
> 
>  /** IPv6 fragment extension header. */
> --
> 2.29.2
> 

With or without my suggested modifications:

Acked-by: Morten Brørup <mb@smartsharesystems.com>

Without my suggested modifications:

Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
  
Morten Brørup April 27, 2021, 3:07 p.m. UTC | #2
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, April 27, 2021 3:58 PM
> 
> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
> verify a packet containing a valid checksum.
> 
> Since these functions should be used to calculate the checksum to set
> in
> a packet, introduce 2 new helpers for checksum verification. They
> return
> 0 if the checksum is valid in the packet.
> 
> Use this new helper in net/tap driver.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/tap/rte_eth_tap.c |   7 +-
>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>  2 files changed, 104 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c
> b/drivers/net/tap/rte_eth_tap.c
> index 71282e8065..b14d5a1d55 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>  					return;
>  				}
>  			}
> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		}
> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>  		mbuf->ol_flags |= cksum_ok ?
>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>  	}
> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
> index 8c189009b0..ef84bcc5bf 100644
> --- a/lib/net/rte_ip.h
> +++ b/lib/net/rte_ip.h
> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, uint64_t ol_flags)
>  }
> 
>  /**
> - * Process the IPv4 UDP or TCP checksum.
> - *
> - * The IP and layer 4 checksum must be set to 0 in the packet by
> - * the caller.
> - *
> - * @param ipv4_hdr
> - *   The pointer to the contiguous IPv4 header.
> - * @param l4_hdr
> - *   The pointer to the beginning of the L4 header.
> - * @return
> - *   The complemented checksum to set in the IP packet.
> + * @internal Calculate the non-complemented IPv4 L4 checksum
>   */
>  static inline uint16_t
> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void
> *l4_hdr)
> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const
> void *l4_hdr)
>  {
>  	uint32_t cksum;
>  	uint32_t l3_len, l4_len;
> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr
> *ipv4_hdr, const void *l4_hdr)
>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
> 
>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
> +
> +	return (uint16_t)cksum;
> +}
> +
> +/**
> + * Process the IPv4 UDP or TCP checksum.
> + *
> + * The IP and layer 4 checksum must be set to 0 in the packet by
> + * the caller.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   The complemented checksum to set in the IP packet.
> + */
> +static inline uint16_t
> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void
> *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	cksum = ~cksum;
> +
>  	/*
> -	 * Per RFC 768:If the computed checksum is zero for UDP,
> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
>  	 * (the equivalent in one's complement arithmetic).
>  	 */
>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>  		cksum = 0xffff;
> 
> -	return (uint16_t)cksum;
> +	return cksum;
> +}

The GCC static branch predictor treats the above comparison as likely. Playing around with Godbolt, I came up with this alternative:

	if (likely(cksum != 0)) return cksum;
	if (ipv4_hdr->next_proto_id == IPPROTO_UDP) return 0xffff;
	return 0;

> +
> +/**
> + * Validate the IPv4 UDP or TCP checksum.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	if (cksum != 0xffff)
> +		return -1;

The GCC static branch predictor treats the above comparison as likely, so I would prefer unlikely() around it.

> +
> +	return 0;
>  }
> 
>  /**
> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>  	return __rte_raw_cksum_reduce(sum);
>  }
> 
> +/**
> + * @internal Calculate the non-complemented IPv4 L4 checksum
> + */
> +static inline uint16_t
> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const
> void *l4_hdr)
> +{
> +	uint32_t cksum;
> +	uint32_t l4_len;
> +
> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +
> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +
> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> +
> +	return (uint16_t)cksum;
> +}
> +
>  /**
>   * Process the IPv6 UDP or TCP checksum.
>   *
> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, uint64_t ol_flags)
>  static inline uint16_t
>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void
> *l4_hdr)
>  {
> -	uint32_t cksum;
> -	uint32_t l4_len;
> -
> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> 
> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +	cksum = ~cksum;
> 
> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
>  	/*
>  	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr
> *ipv6_hdr, const void *l4_hdr)
>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
>  		cksum = 0xffff;

Same comment about GCC static branch prediction as above.

> 
> -	return (uint16_t)cksum;
> +	return cksum;
> +}
> +
> +/**
> + * Validate the IPv6 UDP or TCP checksum.
> + *
> + * The function accepts a 0 checksum, since it can exceptionally
> happen. See 8.1
> + * (Upper-Layer Checksums) in RFC 8200.
> + *
> + * @param ipv6_hdr
> + *   The pointer to the contiguous IPv6 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum;
> +
> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> +	if (cksum != 0xffff)
> +		return -1;

Same comment about GCC static branch prediction as above.

> +
> +	return 0;
>  }
> 
>  /** IPv6 fragment extension header. */
> --
> 2.29.2
> 

With or without my suggested modifications:

Acked-by: Morten Brørup <mb@smartsharesystems.com>

Without my suggested modifications:

Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
  
Olivier Matz April 28, 2021, 12:21 p.m. UTC | #3
Hi Morten,

Thank you for the review.

<...>

On Tue, Apr 27, 2021 at 05:07:04PM +0200, Morten Brørup wrote:
> > +static inline uint16_t
> > +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void
> > *l4_hdr)
> > +{
> > +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> > +
> > +	cksum = ~cksum;
> > +
> >  	/*
> > -	 * Per RFC 768:If the computed checksum is zero for UDP,
> > +	 * Per RFC 768: If the computed checksum is zero for UDP,
> >  	 * it is transmitted as all ones
> >  	 * (the equivalent in one's complement arithmetic).
> >  	 */
> >  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
> >  		cksum = 0xffff;
> > 
> > -	return (uint16_t)cksum;
> > +	return cksum;
> > +}
> 
> The GCC static branch predictor treats the above comparison as likely. Playing around with Godbolt, I came up with this alternative:
> 
> 	if (likely(cksum != 0)) return cksum;
> 	if (ipv4_hdr->next_proto_id == IPPROTO_UDP) return 0xffff;
> 	return 0;

Good idea, this is indeed an unlikely branch.
However this code was already present before this patch,
so I suggest to add it as a specific optimization patch.

> > +
> > +/**
> > + * Validate the IPv4 UDP or TCP checksum.
> > + *
> > + * @param ipv4_hdr
> > + *   The pointer to the contiguous IPv4 header.
> > + * @param l4_hdr
> > + *   The pointer to the beginning of the L4 header.
> > + * @return
> > + *   Return 0 if the checksum is correct, else -1.
> > + */
> > +__rte_experimental
> > +static inline int
> > +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> > +			     const void *l4_hdr)
> > +{
> > +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> > +
> > +	if (cksum != 0xffff)
> > +		return -1;
> 
> The GCC static branch predictor treats the above comparison as likely, so I would prefer unlikely() around it.

For this one, I'm less convinced: should we decide here whether
the good or the bad checksum is more likely than the other?

Given it's a static inline function, wouldn't it be better to let
the application call it this way:
  if (likely(rte_ipv4_udptcp_cksum_verify(...) == 0))  ?


Regards,
Olivier
  
Morten Brørup April 28, 2021, 12:42 p.m. UTC | #4
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> Sent: Wednesday, April 28, 2021 2:22 PM
> 
> Hi Morten,
> 
> Thank you for the review.
> 
> <...>
> 
> On Tue, Apr 27, 2021 at 05:07:04PM +0200, Morten Brørup wrote:
> > > +static inline uint16_t
> > > +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const
> void
> > > *l4_hdr)
> > > +{
> > > +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> > > +
> > > +	cksum = ~cksum;
> > > +
> > >  	/*
> > > -	 * Per RFC 768:If the computed checksum is zero for UDP,
> > > +	 * Per RFC 768: If the computed checksum is zero for UDP,
> > >  	 * it is transmitted as all ones
> > >  	 * (the equivalent in one's complement arithmetic).
> > >  	 */
> > >  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
> > >  		cksum = 0xffff;
> > >
> > > -	return (uint16_t)cksum;
> > > +	return cksum;
> > > +}
> >
> > The GCC static branch predictor treats the above comparison as
> likely. Playing around with Godbolt, I came up with this alternative:
> >
> > 	if (likely(cksum != 0)) return cksum;
> > 	if (ipv4_hdr->next_proto_id == IPPROTO_UDP) return 0xffff;
> > 	return 0;
> 
> Good idea, this is indeed an unlikely branch.
> However this code was already present before this patch,
> so I suggest to add it as a specific optimization patch.

Please do.

> 
> > > +
> > > +/**
> > > + * Validate the IPv4 UDP or TCP checksum.
> > > + *
> > > + * @param ipv4_hdr
> > > + *   The pointer to the contiguous IPv4 header.
> > > + * @param l4_hdr
> > > + *   The pointer to the beginning of the L4 header.
> > > + * @return
> > > + *   Return 0 if the checksum is correct, else -1.
> > > + */
> > > +__rte_experimental
> > > +static inline int
> > > +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> > > +			     const void *l4_hdr)
> > > +{
> > > +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> > > +
> > > +	if (cksum != 0xffff)
> > > +		return -1;
> >
> > The GCC static branch predictor treats the above comparison as
> likely, so I would prefer unlikely() around it.
> 
> For this one, I'm less convinced: should we decide here whether
> the good or the bad checksum is more likely than the other?

You are right... this may be a question of personal preference - or application specific preference.

> 
> Given it's a static inline function, wouldn't it be better to let
> the application call it this way:
>   if (likely(rte_ipv4_udptcp_cksum_verify(...) == 0))  ?
> 

Good point. Double checking on Godbolt confirms the validity of your suggestion.

> 
> Regards,
> Olivier
  
Ferruh Yigit April 30, 2021, 3:42 p.m. UTC | #5
On 4/27/2021 2:57 PM, Olivier Matz wrote:
> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
> verify a packet containing a valid checksum.
> 
> Since these functions should be used to calculate the checksum to set in
> a packet, introduce 2 new helpers for checksum verification. They return
> 0 if the checksum is valid in the packet.
> 
> Use this new helper in net/tap driver.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/tap/rte_eth_tap.c |   7 +-
>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>  2 files changed, 104 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 71282e8065..b14d5a1d55 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>  					return;
>  				}
>  			}
> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
> +								 l4_hdr);
>  		}
> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>  		mbuf->ol_flags |= cksum_ok ?
>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>  	}
> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
> index 8c189009b0..ef84bcc5bf 100644
> --- a/lib/net/rte_ip.h
> +++ b/lib/net/rte_ip.h
> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
>  }
>  
>  /**
> - * Process the IPv4 UDP or TCP checksum.
> - *
> - * The IP and layer 4 checksum must be set to 0 in the packet by
> - * the caller.
> - *
> - * @param ipv4_hdr
> - *   The pointer to the contiguous IPv4 header.
> - * @param l4_hdr
> - *   The pointer to the beginning of the L4 header.
> - * @return
> - *   The complemented checksum to set in the IP packet.
> + * @internal Calculate the non-complemented IPv4 L4 checksum
>   */
>  static inline uint16_t
> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>  {
>  	uint32_t cksum;
>  	uint32_t l3_len, l4_len;
> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
>  
>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
> +
> +	return (uint16_t)cksum;
> +}
> +
> +/**
> + * Process the IPv4 UDP or TCP checksum.
> + *
> + * The IP and layer 4 checksum must be set to 0 in the packet by
> + * the caller.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   The complemented checksum to set in the IP packet.
> + */
> +static inline uint16_t
> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	cksum = ~cksum;
> +
>  	/*
> -	 * Per RFC 768:If the computed checksum is zero for UDP,
> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
>  	 * (the equivalent in one's complement arithmetic).
>  	 */
>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>  		cksum = 0xffff;
>  
> -	return (uint16_t)cksum;
> +	return cksum;
> +}
> +
> +/**
> + * Validate the IPv4 UDP or TCP checksum.
> + *
> + * @param ipv4_hdr
> + *   The pointer to the contiguous IPv4 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> +
> +	if (cksum != 0xffff)
> +		return -1;
> +
> +	return 0;

There is behavior change in tap verify, I am asking just to verify if expected,

Previously for UDP, if calculated checksum is '0', the 'rte_ipv4_udptcp_cksum()'
returns 0xFFFF.
And 0xFFFF is taken as good checksum by tap PMD.

With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it will
be taken as bad checksum.

I don't know if calculated checksum with valid checksum in place can return 0.


Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion (cksum =
~cksum;) seems changing pass/fail status of the checksum, unless I am not
missing anything here.


>  }
>  
>  /**
> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>  	return __rte_raw_cksum_reduce(sum);
>  }
>  
> +/**
> + * @internal Calculate the non-complemented IPv4 L4 checksum
> + */
> +static inline uint16_t
> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
> +{
> +	uint32_t cksum;
> +	uint32_t l4_len;
> +
> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +
> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +
> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> +
> +	return (uint16_t)cksum;
> +}
> +
>  /**
>   * Process the IPv6 UDP or TCP checksum.
>   *
> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>  static inline uint16_t
>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>  {
> -	uint32_t cksum;
> -	uint32_t l4_len;
> -
> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
>  
> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> +	cksum = ~cksum;
>  
> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> -	cksum = (~cksum) & 0xffff;
>  	/*
>  	 * Per RFC 768: If the computed checksum is zero for UDP,
>  	 * it is transmitted as all ones
> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
>  		cksum = 0xffff;
>  
> -	return (uint16_t)cksum;
> +	return cksum;
> +}
> +
> +/**
> + * Validate the IPv6 UDP or TCP checksum.
> + *
> + * The function accepts a 0 checksum, since it can exceptionally happen. See 8.1
> + * (Upper-Layer Checksums) in RFC 8200.
> + *
> + * @param ipv6_hdr
> + *   The pointer to the contiguous IPv6 header.
> + * @param l4_hdr
> + *   The pointer to the beginning of the L4 header.
> + * @return
> + *   Return 0 if the checksum is correct, else -1.
> + */
> +__rte_experimental
> +static inline int
> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
> +			     const void *l4_hdr)
> +{
> +	uint16_t cksum;
> +
> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> +	if (cksum != 0xffff)
> +		return -1;
> +
> +	return 0;
>  }

Nitpicking but, 'rte_ipv4_udptcp_cksum_verify()' is almost same with this
function ('rte_ipv6_udptcp_cksum_verify()') but they have different line
spacing, can be good to have similar syntax for both.
  
Andrew Rybchenko June 8, 2021, 10:23 a.m. UTC | #6
On 4/30/21 6:42 PM, Ferruh Yigit wrote:
> On 4/27/2021 2:57 PM, Olivier Matz wrote:
>> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
>> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
>> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
>> verify a packet containing a valid checksum.
>>
>> Since these functions should be used to calculate the checksum to set in
>> a packet, introduce 2 new helpers for checksum verification. They return
>> 0 if the checksum is valid in the packet.
>>
>> Use this new helper in net/tap driver.
>>
>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>> ---
>>  drivers/net/tap/rte_eth_tap.c |   7 +-
>>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>>  2 files changed, 104 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>> index 71282e8065..b14d5a1d55 100644
>> --- a/drivers/net/tap/rte_eth_tap.c
>> +++ b/drivers/net/tap/rte_eth_tap.c
>> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>>  					return;
>>  				}
>>  			}
>> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
>> +								 l4_hdr);
>>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
>> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
>> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
>> +								 l4_hdr);
>>  		}
>> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>>  		mbuf->ol_flags |= cksum_ok ?
>>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>>  	}
>> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
>> index 8c189009b0..ef84bcc5bf 100644
>> --- a/lib/net/rte_ip.h
>> +++ b/lib/net/rte_ip.h
>> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
>>  }
>>  
>>  /**
>> - * Process the IPv4 UDP or TCP checksum.
>> - *
>> - * The IP and layer 4 checksum must be set to 0 in the packet by
>> - * the caller.
>> - *
>> - * @param ipv4_hdr
>> - *   The pointer to the contiguous IPv4 header.
>> - * @param l4_hdr
>> - *   The pointer to the beginning of the L4 header.
>> - * @return
>> - *   The complemented checksum to set in the IP packet.
>> + * @internal Calculate the non-complemented IPv4 L4 checksum
>>   */
>>  static inline uint16_t
>> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>  {
>>  	uint32_t cksum;
>>  	uint32_t l3_len, l4_len;
>> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
>>  
>>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>> -	cksum = (~cksum) & 0xffff;
>> +
>> +	return (uint16_t)cksum;
>> +}
>> +
>> +/**
>> + * Process the IPv4 UDP or TCP checksum.
>> + *
>> + * The IP and layer 4 checksum must be set to 0 in the packet by
>> + * the caller.
>> + *
>> + * @param ipv4_hdr
>> + *   The pointer to the contiguous IPv4 header.
>> + * @param l4_hdr
>> + *   The pointer to the beginning of the L4 header.
>> + * @return
>> + *   The complemented checksum to set in the IP packet.
>> + */
>> +static inline uint16_t
>> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>> +{
>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>> +
>> +	cksum = ~cksum;
>> +
>>  	/*
>> -	 * Per RFC 768:If the computed checksum is zero for UDP,
>> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>>  	 * it is transmitted as all ones
>>  	 * (the equivalent in one's complement arithmetic).
>>  	 */
>>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>>  		cksum = 0xffff;
>>  
>> -	return (uint16_t)cksum;
>> +	return cksum;
>> +}
>> +
>> +/**
>> + * Validate the IPv4 UDP or TCP checksum.
>> + *
>> + * @param ipv4_hdr
>> + *   The pointer to the contiguous IPv4 header.
>> + * @param l4_hdr
>> + *   The pointer to the beginning of the L4 header.
>> + * @return
>> + *   Return 0 if the checksum is correct, else -1.
>> + */
>> +__rte_experimental
>> +static inline int
>> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
>> +			     const void *l4_hdr)
>> +{
>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>> +
>> +	if (cksum != 0xffff)
>> +		return -1;
>> +
>> +	return 0;
> 
> There is behavior change in tap verify, I am asking just to verify if expected,
> 
> Previously for UDP, if calculated checksum is '0', the 'rte_ipv4_udptcp_cksum()'
> returns 0xFFFF.
> And 0xFFFF is taken as good checksum by tap PMD.
> 
> With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it will
> be taken as bad checksum.
> 
> I don't know if calculated checksum with valid checksum in place can return 0.
> 
> 
> Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion (cksum =
> ~cksum;) seems changing pass/fail status of the checksum, unless I am not
> missing anything here.

Yes, it looks suspicious to me as well.

Olivier, could you clarify, please.

>>  }
>>  
>>  /**
>> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>>  	return __rte_raw_cksum_reduce(sum);
>>  }
>>  
>> +/**
>> + * @internal Calculate the non-complemented IPv4 L4 checksum
>> + */
>> +static inline uint16_t
>> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>> +{
>> +	uint32_t cksum;
>> +	uint32_t l4_len;
>> +
>> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
>> +
>> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
>> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
>> +
>> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>> +
>> +	return (uint16_t)cksum;
>> +}
>> +
>>  /**
>>   * Process the IPv6 UDP or TCP checksum.
>>   *
>> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>>  static inline uint16_t
>>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>>  {
>> -	uint32_t cksum;
>> -	uint32_t l4_len;
>> -
>> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
>> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
>>  
>> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
>> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
>> +	cksum = ~cksum;
>>  
>> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>> -	cksum = (~cksum) & 0xffff;
>>  	/*
>>  	 * Per RFC 768: If the computed checksum is zero for UDP,
>>  	 * it is transmitted as all ones
>> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
>>  		cksum = 0xffff;
>>  
>> -	return (uint16_t)cksum;
>> +	return cksum;
>> +}
>> +
>> +/**
>> + * Validate the IPv6 UDP or TCP checksum.
>> + *
>> + * The function accepts a 0 checksum, since it can exceptionally happen. See 8.1
>> + * (Upper-Layer Checksums) in RFC 8200.
>> + *
>> + * @param ipv6_hdr
>> + *   The pointer to the contiguous IPv6 header.
>> + * @param l4_hdr
>> + *   The pointer to the beginning of the L4 header.
>> + * @return
>> + *   Return 0 if the checksum is correct, else -1.
>> + */
>> +__rte_experimental
>> +static inline int
>> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
>> +			     const void *l4_hdr)
>> +{
>> +	uint16_t cksum;
>> +
>> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
>> +	if (cksum != 0xffff)
>> +		return -1;
>> +
>> +	return 0;
>>  }
> 
> Nitpicking but, 'rte_ipv4_udptcp_cksum_verify()' is almost same with this
> function ('rte_ipv6_udptcp_cksum_verify()') but they have different line
> spacing, can be good to have similar syntax for both.
>
  
Olivier Matz June 8, 2021, 12:29 p.m. UTC | #7
Hi Ferruh, Andrew,

On Tue, Jun 08, 2021 at 01:23:33PM +0300, Andrew Rybchenko wrote:
> On 4/30/21 6:42 PM, Ferruh Yigit wrote:
> > On 4/27/2021 2:57 PM, Olivier Matz wrote:
> >> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
> >> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
> >> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
> >> verify a packet containing a valid checksum.
> >>
> >> Since these functions should be used to calculate the checksum to set in
> >> a packet, introduce 2 new helpers for checksum verification. They return
> >> 0 if the checksum is valid in the packet.
> >>
> >> Use this new helper in net/tap driver.
> >>
> >> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> >> ---
> >>  drivers/net/tap/rte_eth_tap.c |   7 +-
> >>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
> >>  2 files changed, 104 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> >> index 71282e8065..b14d5a1d55 100644
> >> --- a/drivers/net/tap/rte_eth_tap.c
> >> +++ b/drivers/net/tap/rte_eth_tap.c
> >> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
> >>  					return;
> >>  				}
> >>  			}
> >> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
> >> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
> >> +								 l4_hdr);
> >>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
> >> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
> >> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
> >> +								 l4_hdr);
> >>  		}
> >> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
> >>  		mbuf->ol_flags |= cksum_ok ?
> >>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
> >>  	}
> >> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
> >> index 8c189009b0..ef84bcc5bf 100644
> >> --- a/lib/net/rte_ip.h
> >> +++ b/lib/net/rte_ip.h
> >> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
> >>  }
> >>  
> >>  /**
> >> - * Process the IPv4 UDP or TCP checksum.
> >> - *
> >> - * The IP and layer 4 checksum must be set to 0 in the packet by
> >> - * the caller.
> >> - *
> >> - * @param ipv4_hdr
> >> - *   The pointer to the contiguous IPv4 header.
> >> - * @param l4_hdr
> >> - *   The pointer to the beginning of the L4 header.
> >> - * @return
> >> - *   The complemented checksum to set in the IP packet.
> >> + * @internal Calculate the non-complemented IPv4 L4 checksum
> >>   */
> >>  static inline uint16_t
> >> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> >> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> >>  {
> >>  	uint32_t cksum;
> >>  	uint32_t l3_len, l4_len;
> >> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> >>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
> >>  
> >>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> >> -	cksum = (~cksum) & 0xffff;
> >> +
> >> +	return (uint16_t)cksum;
> >> +}
> >> +
> >> +/**
> >> + * Process the IPv4 UDP or TCP checksum.
> >> + *
> >> + * The IP and layer 4 checksum must be set to 0 in the packet by
> >> + * the caller.
> >> + *
> >> + * @param ipv4_hdr
> >> + *   The pointer to the contiguous IPv4 header.
> >> + * @param l4_hdr
> >> + *   The pointer to the beginning of the L4 header.
> >> + * @return
> >> + *   The complemented checksum to set in the IP packet.
> >> + */
> >> +static inline uint16_t
> >> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
> >> +{
> >> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> >> +
> >> +	cksum = ~cksum;
> >> +
> >>  	/*
> >> -	 * Per RFC 768:If the computed checksum is zero for UDP,
> >> +	 * Per RFC 768: If the computed checksum is zero for UDP,
> >>  	 * it is transmitted as all ones
> >>  	 * (the equivalent in one's complement arithmetic).
> >>  	 */
> >>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
> >>  		cksum = 0xffff;
> >>  
> >> -	return (uint16_t)cksum;
> >> +	return cksum;
> >> +}
> >> +
> >> +/**
> >> + * Validate the IPv4 UDP or TCP checksum.
> >> + *
> >> + * @param ipv4_hdr
> >> + *   The pointer to the contiguous IPv4 header.
> >> + * @param l4_hdr
> >> + *   The pointer to the beginning of the L4 header.
> >> + * @return
> >> + *   Return 0 if the checksum is correct, else -1.
> >> + */
> >> +__rte_experimental
> >> +static inline int
> >> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
> >> +			     const void *l4_hdr)
> >> +{
> >> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
> >> +
> >> +	if (cksum != 0xffff)
> >> +		return -1;
> >> +
> >> +	return 0;
> > 
> > There is behavior change in tap verify, I am asking just to verify if expected,
> > 
> > Previously for UDP, if calculated checksum is '0', the 'rte_ipv4_udptcp_cksum()'
> > returns 0xFFFF.
> > And 0xFFFF is taken as good checksum by tap PMD.

rte_ipv4_udptcp_cksum() cannot return 0xFFFF: this is only possible if
all data is 0. Before verifying a udp packet, the user must check that
it is not 0 (which means no checksum). In tcp, "Data offset" is never
0. Moreover, port=0 is a reserved value for both udp and tcp.

> > With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it will
> > be taken as bad checksum.
> > 
> > I don't know if calculated checksum with valid checksum in place can return 0.
> > 
> > 
> > Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion (cksum =
> > ~cksum;) seems changing pass/fail status of the checksum, unless I am not
> > missing anything here.
> 
> Yes, it looks suspicious to me as well.
> 
> Olivier, could you clarify, please.

Before commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
the behavior was:

  // rte_ipv4_udptcp_cksum() is 0xffff if checksum is valid
  // so cksum is 0 if checksum is valid
  cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
  // ol_flags is set to PKT_RX_L4_CKSUM_GOOD if checksum is valid
  mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;

After commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
it is broken:

  // rte_ipv4_udptcp_cksum() is 0 (tcp) or 0xffff (udp) if checksum is valid
  // so cksum is 0xffff (tcp) or 0 (udp) if checksum is valid
  cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
  // ol_flags is set to BAD (tcp) or GOOD (udp) if checksum is valid
  mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;

After patch 2/4 ("net/tap: fix Rx cksum flags on TCP packets"), the
correct behavior is restored:

  // cksum is 0 (tcp) or 0xffff (udp) if checksum is valid
  // note: 0xffff for tcp cannot happen (there is at least 1 bit set in the header)
  // note: 0 for udp cannot happen (it is replaced by in rte_ipv4_udptcp_cksum())
  cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
  // cksum_ok is 1 if checksum is valid
  cksum_ok = (cksum == 0) || (cksum == 0xffff);
  // ol_flags is set to GOOD if checksum is valid
  mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;

After this patch [3/4] ("net: introduce functions to verify L4 checksums"),
it is simplified by using rte_ipv4_udptcp_cksum_verify():

  // cksum_ok is 1 if checksum is valid
  cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, l4_hdr);
  // ol_flags is set to GOOD if checksum is valid
  mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;


Olivier

> 
> >>  }
> >>  
> >>  /**
> >> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
> >>  	return __rte_raw_cksum_reduce(sum);
> >>  }
> >>  
> >> +/**
> >> + * @internal Calculate the non-complemented IPv4 L4 checksum
> >> + */
> >> +static inline uint16_t
> >> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
> >> +{
> >> +	uint32_t cksum;
> >> +	uint32_t l4_len;
> >> +
> >> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> >> +
> >> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
> >> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> >> +
> >> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> >> +
> >> +	return (uint16_t)cksum;
> >> +}
> >> +
> >>  /**
> >>   * Process the IPv6 UDP or TCP checksum.
> >>   *
> >> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
> >>  static inline uint16_t
> >>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
> >>  {
> >> -	uint32_t cksum;
> >> -	uint32_t l4_len;
> >> -
> >> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
> >> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> >>  
> >> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
> >> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
> >> +	cksum = ~cksum;
> >>  
> >> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
> >> -	cksum = (~cksum) & 0xffff;
> >>  	/*
> >>  	 * Per RFC 768: If the computed checksum is zero for UDP,
> >>  	 * it is transmitted as all ones
> >> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
> >>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
> >>  		cksum = 0xffff;
> >>  
> >> -	return (uint16_t)cksum;
> >> +	return cksum;
> >> +}
> >> +
> >> +/**
> >> + * Validate the IPv6 UDP or TCP checksum.
> >> + *
> >> + * The function accepts a 0 checksum, since it can exceptionally happen. See 8.1
> >> + * (Upper-Layer Checksums) in RFC 8200.
> >> + *
> >> + * @param ipv6_hdr
> >> + *   The pointer to the contiguous IPv6 header.
> >> + * @param l4_hdr
> >> + *   The pointer to the beginning of the L4 header.
> >> + * @return
> >> + *   Return 0 if the checksum is correct, else -1.
> >> + */
> >> +__rte_experimental
> >> +static inline int
> >> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
> >> +			     const void *l4_hdr)
> >> +{
> >> +	uint16_t cksum;
> >> +
> >> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
> >> +	if (cksum != 0xffff)
> >> +		return -1;
> >> +
> >> +	return 0;
> >>  }
> > 
> > Nitpicking but, 'rte_ipv4_udptcp_cksum_verify()' is almost same with this
> > function ('rte_ipv6_udptcp_cksum_verify()') but they have different line
> > spacing, can be good to have similar syntax for both.
> > 
>
  
Andrew Rybchenko June 8, 2021, 12:39 p.m. UTC | #8
On 6/8/21 3:29 PM, Olivier Matz wrote:
> Hi Ferruh, Andrew,
> 
> On Tue, Jun 08, 2021 at 01:23:33PM +0300, Andrew Rybchenko wrote:
>> On 4/30/21 6:42 PM, Ferruh Yigit wrote:
>>> On 4/27/2021 2:57 PM, Olivier Matz wrote:
>>>> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
>>>> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
>>>> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
>>>> verify a packet containing a valid checksum.
>>>>
>>>> Since these functions should be used to calculate the checksum to set in
>>>> a packet, introduce 2 new helpers for checksum verification. They return
>>>> 0 if the checksum is valid in the packet.
>>>>
>>>> Use this new helper in net/tap driver.
>>>>
>>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>>>> ---
>>>>  drivers/net/tap/rte_eth_tap.c |   7 +-
>>>>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>>>>  2 files changed, 104 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>>>> index 71282e8065..b14d5a1d55 100644
>>>> --- a/drivers/net/tap/rte_eth_tap.c
>>>> +++ b/drivers/net/tap/rte_eth_tap.c
>>>> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>>>>  					return;
>>>>  				}
>>>>  			}
>>>> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>>>> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
>>>> +								 l4_hdr);
>>>>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
>>>> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
>>>> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
>>>> +								 l4_hdr);
>>>>  		}
>>>> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>>>>  		mbuf->ol_flags |= cksum_ok ?
>>>>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>>>>  	}
>>>> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
>>>> index 8c189009b0..ef84bcc5bf 100644
>>>> --- a/lib/net/rte_ip.h
>>>> +++ b/lib/net/rte_ip.h
>>>> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
>>>>  }
>>>>  
>>>>  /**
>>>> - * Process the IPv4 UDP or TCP checksum.
>>>> - *
>>>> - * The IP and layer 4 checksum must be set to 0 in the packet by
>>>> - * the caller.
>>>> - *
>>>> - * @param ipv4_hdr
>>>> - *   The pointer to the contiguous IPv4 header.
>>>> - * @param l4_hdr
>>>> - *   The pointer to the beginning of the L4 header.
>>>> - * @return
>>>> - *   The complemented checksum to set in the IP packet.
>>>> + * @internal Calculate the non-complemented IPv4 L4 checksum
>>>>   */
>>>>  static inline uint16_t
>>>> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>  {
>>>>  	uint32_t cksum;
>>>>  	uint32_t l3_len, l4_len;
>>>> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
>>>>  
>>>>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>>>> -	cksum = (~cksum) & 0xffff;
>>>> +
>>>> +	return (uint16_t)cksum;
>>>> +}
>>>> +
>>>> +/**
>>>> + * Process the IPv4 UDP or TCP checksum.
>>>> + *
>>>> + * The IP and layer 4 checksum must be set to 0 in the packet by
>>>> + * the caller.
>>>> + *
>>>> + * @param ipv4_hdr
>>>> + *   The pointer to the contiguous IPv4 header.
>>>> + * @param l4_hdr
>>>> + *   The pointer to the beginning of the L4 header.
>>>> + * @return
>>>> + *   The complemented checksum to set in the IP packet.
>>>> + */
>>>> +static inline uint16_t
>>>> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>> +{
>>>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>>>> +
>>>> +	cksum = ~cksum;
>>>> +
>>>>  	/*
>>>> -	 * Per RFC 768:If the computed checksum is zero for UDP,
>>>> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>>>>  	 * it is transmitted as all ones
>>>>  	 * (the equivalent in one's complement arithmetic).
>>>>  	 */
>>>>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>>>>  		cksum = 0xffff;
>>>>  
>>>> -	return (uint16_t)cksum;
>>>> +	return cksum;
>>>> +}
>>>> +
>>>> +/**
>>>> + * Validate the IPv4 UDP or TCP checksum.
>>>> + *
>>>> + * @param ipv4_hdr
>>>> + *   The pointer to the contiguous IPv4 header.
>>>> + * @param l4_hdr
>>>> + *   The pointer to the beginning of the L4 header.
>>>> + * @return
>>>> + *   Return 0 if the checksum is correct, else -1.
>>>> + */
>>>> +__rte_experimental
>>>> +static inline int
>>>> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
>>>> +			     const void *l4_hdr)
>>>> +{
>>>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>>>> +
>>>> +	if (cksum != 0xffff)
>>>> +		return -1;
>>>> +
>>>> +	return 0;
>>>
>>> There is behavior change in tap verify, I am asking just to verify if expected,
>>>
>>> Previously for UDP, if calculated checksum is '0', the 'rte_ipv4_udptcp_cksum()'
>>> returns 0xFFFF.
>>> And 0xFFFF is taken as good checksum by tap PMD.
> 
> rte_ipv4_udptcp_cksum() cannot return 0xFFFF: this is only possible if
> all data is 0. Before verifying a udp packet, the user must check that
> it is not 0 (which means no checksum). In tcp, "Data offset" is never
> 0. Moreover, port=0 is a reserved value for both udp and tcp.
> 
>>> With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it will
>>> be taken as bad checksum.
>>>
>>> I don't know if calculated checksum with valid checksum in place can return 0.
>>>
>>>
>>> Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion (cksum =
>>> ~cksum;) seems changing pass/fail status of the checksum, unless I am not
>>> missing anything here.
>>
>> Yes, it looks suspicious to me as well.
>>
>> Olivier, could you clarify, please.
> 
> Before commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
> the behavior was:
> 
>   // rte_ipv4_udptcp_cksum() is 0xffff if checksum is valid
>   // so cksum is 0 if checksum is valid
>   cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>   // ol_flags is set to PKT_RX_L4_CKSUM_GOOD if checksum is valid
>   mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;
> 
> After commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
> it is broken:
> 
>   // rte_ipv4_udptcp_cksum() is 0 (tcp) or 0xffff (udp) if checksum is valid
>   // so cksum is 0xffff (tcp) or 0 (udp) if checksum is valid
>   cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>   // ol_flags is set to BAD (tcp) or GOOD (udp) if checksum is valid
>   mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;
> 
> After patch 2/4 ("net/tap: fix Rx cksum flags on TCP packets"), the
> correct behavior is restored:
> 
>   // cksum is 0 (tcp) or 0xffff (udp) if checksum is valid
>   // note: 0xffff for tcp cannot happen (there is at least 1 bit set in the header)
>   // note: 0 for udp cannot happen (it is replaced by in rte_ipv4_udptcp_cksum())
>   cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>   // cksum_ok is 1 if checksum is valid
>   cksum_ok = (cksum == 0) || (cksum == 0xffff);
>   // ol_flags is set to GOOD if checksum is valid
>   mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
> 
> After this patch [3/4] ("net: introduce functions to verify L4 checksums"),
> it is simplified by using rte_ipv4_udptcp_cksum_verify():
> 
>   // cksum_ok is 1 if checksum is valid
>   cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, l4_hdr);
>   // ol_flags is set to GOOD if checksum is valid
>   mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
> 

Many thanks for the detailed explanations. It replies to all my
questions (even not asked, but kept in my head).

Andrew.

> Olivier
> 
>>
>>>>  }
>>>>  
>>>>  /**
>>>> @@ -448,6 +484,25 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>>>>  	return __rte_raw_cksum_reduce(sum);
>>>>  }
>>>>  
>>>> +/**
>>>> + * @internal Calculate the non-complemented IPv4 L4 checksum
>>>> + */
>>>> +static inline uint16_t
>>>> +__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>>>> +{
>>>> +	uint32_t cksum;
>>>> +	uint32_t l4_len;
>>>> +
>>>> +	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
>>>> +
>>>> +	cksum = rte_raw_cksum(l4_hdr, l4_len);
>>>> +	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
>>>> +
>>>> +	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>>>> +
>>>> +	return (uint16_t)cksum;
>>>> +}
>>>> +
>>>>  /**
>>>>   * Process the IPv6 UDP or TCP checksum.
>>>>   *
>>>> @@ -464,16 +519,10 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
>>>>  static inline uint16_t
>>>>  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>>>>  {
>>>> -	uint32_t cksum;
>>>> -	uint32_t l4_len;
>>>> -
>>>> -	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
>>>> +	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
>>>>  
>>>> -	cksum = rte_raw_cksum(l4_hdr, l4_len);
>>>> -	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
>>>> +	cksum = ~cksum;
>>>>  
>>>> -	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>>>> -	cksum = (~cksum) & 0xffff;
>>>>  	/*
>>>>  	 * Per RFC 768: If the computed checksum is zero for UDP,
>>>>  	 * it is transmitted as all ones
>>>> @@ -482,7 +531,34 @@ rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
>>>>  	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
>>>>  		cksum = 0xffff;
>>>>  
>>>> -	return (uint16_t)cksum;
>>>> +	return cksum;
>>>> +}
>>>> +
>>>> +/**
>>>> + * Validate the IPv6 UDP or TCP checksum.
>>>> + *
>>>> + * The function accepts a 0 checksum, since it can exceptionally happen. See 8.1
>>>> + * (Upper-Layer Checksums) in RFC 8200.
>>>> + *
>>>> + * @param ipv6_hdr
>>>> + *   The pointer to the contiguous IPv6 header.
>>>> + * @param l4_hdr
>>>> + *   The pointer to the beginning of the L4 header.
>>>> + * @return
>>>> + *   Return 0 if the checksum is correct, else -1.
>>>> + */
>>>> +__rte_experimental
>>>> +static inline int
>>>> +rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
>>>> +			     const void *l4_hdr)
>>>> +{
>>>> +	uint16_t cksum;
>>>> +
>>>> +	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
>>>> +	if (cksum != 0xffff)
>>>> +		return -1;
>>>> +
>>>> +	return 0;
>>>>  }
>>>
>>> Nitpicking but, 'rte_ipv4_udptcp_cksum_verify()' is almost same with this
>>> function ('rte_ipv6_udptcp_cksum_verify()') but they have different line
>>> spacing, can be good to have similar syntax for both.
>>>
>>
  
Ferruh Yigit June 25, 2021, 3:38 p.m. UTC | #9
On 6/8/2021 1:39 PM, Andrew Rybchenko wrote:
> On 6/8/21 3:29 PM, Olivier Matz wrote:
>> Hi Ferruh, Andrew,
>>
>> On Tue, Jun 08, 2021 at 01:23:33PM +0300, Andrew Rybchenko wrote:
>>> On 4/30/21 6:42 PM, Ferruh Yigit wrote:
>>>> On 4/27/2021 2:57 PM, Olivier Matz wrote:
>>>>> Since commit d5df2ae0428a ("net: fix unneeded replacement of TCP
>>>>> checksum 0"), the functions rte_ipv4_udptcp_cksum() and
>>>>> rte_ipv6_udptcp_cksum() can return either 0x0000 or 0xffff when used to
>>>>> verify a packet containing a valid checksum.
>>>>>
>>>>> Since these functions should be used to calculate the checksum to set in
>>>>> a packet, introduce 2 new helpers for checksum verification. They return
>>>>> 0 if the checksum is valid in the packet.
>>>>>
>>>>> Use this new helper in net/tap driver.
>>>>>
>>>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>>>>> ---
>>>>>  drivers/net/tap/rte_eth_tap.c |   7 +-
>>>>>  lib/net/rte_ip.h              | 124 +++++++++++++++++++++++++++-------
>>>>>  2 files changed, 104 insertions(+), 27 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>>>>> index 71282e8065..b14d5a1d55 100644
>>>>> --- a/drivers/net/tap/rte_eth_tap.c
>>>>> +++ b/drivers/net/tap/rte_eth_tap.c
>>>>> @@ -365,11 +365,12 @@ tap_verify_csum(struct rte_mbuf *mbuf)
>>>>>  					return;
>>>>>  				}
>>>>>  			}
>>>>> -			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>>>>> +			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
>>>>> +								 l4_hdr);
>>>>>  		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
>>>>> -			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
>>>>> +			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
>>>>> +								 l4_hdr);
>>>>>  		}
>>>>> -		cksum_ok = (cksum == 0) || (cksum == 0xffff);
>>>>>  		mbuf->ol_flags |= cksum_ok ?
>>>>>  			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>>>>>  	}
>>>>> diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
>>>>> index 8c189009b0..ef84bcc5bf 100644
>>>>> --- a/lib/net/rte_ip.h
>>>>> +++ b/lib/net/rte_ip.h
>>>>> @@ -344,20 +344,10 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
>>>>>  }
>>>>>  
>>>>>  /**
>>>>> - * Process the IPv4 UDP or TCP checksum.
>>>>> - *
>>>>> - * The IP and layer 4 checksum must be set to 0 in the packet by
>>>>> - * the caller.
>>>>> - *
>>>>> - * @param ipv4_hdr
>>>>> - *   The pointer to the contiguous IPv4 header.
>>>>> - * @param l4_hdr
>>>>> - *   The pointer to the beginning of the L4 header.
>>>>> - * @return
>>>>> - *   The complemented checksum to set in the IP packet.
>>>>> + * @internal Calculate the non-complemented IPv4 L4 checksum
>>>>>   */
>>>>>  static inline uint16_t
>>>>> -rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>> +__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>>  {
>>>>>  	uint32_t cksum;
>>>>>  	uint32_t l3_len, l4_len;
>>>>> @@ -374,16 +364,62 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>>  	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
>>>>>  
>>>>>  	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
>>>>> -	cksum = (~cksum) & 0xffff;
>>>>> +
>>>>> +	return (uint16_t)cksum;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * Process the IPv4 UDP or TCP checksum.
>>>>> + *
>>>>> + * The IP and layer 4 checksum must be set to 0 in the packet by
>>>>> + * the caller.
>>>>> + *
>>>>> + * @param ipv4_hdr
>>>>> + *   The pointer to the contiguous IPv4 header.
>>>>> + * @param l4_hdr
>>>>> + *   The pointer to the beginning of the L4 header.
>>>>> + * @return
>>>>> + *   The complemented checksum to set in the IP packet.
>>>>> + */
>>>>> +static inline uint16_t
>>>>> +rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
>>>>> +{
>>>>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>>>>> +
>>>>> +	cksum = ~cksum;
>>>>> +
>>>>>  	/*
>>>>> -	 * Per RFC 768:If the computed checksum is zero for UDP,
>>>>> +	 * Per RFC 768: If the computed checksum is zero for UDP,
>>>>>  	 * it is transmitted as all ones
>>>>>  	 * (the equivalent in one's complement arithmetic).
>>>>>  	 */
>>>>>  	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
>>>>>  		cksum = 0xffff;
>>>>>  
>>>>> -	return (uint16_t)cksum;
>>>>> +	return cksum;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * Validate the IPv4 UDP or TCP checksum.
>>>>> + *
>>>>> + * @param ipv4_hdr
>>>>> + *   The pointer to the contiguous IPv4 header.
>>>>> + * @param l4_hdr
>>>>> + *   The pointer to the beginning of the L4 header.
>>>>> + * @return
>>>>> + *   Return 0 if the checksum is correct, else -1.
>>>>> + */
>>>>> +__rte_experimental
>>>>> +static inline int
>>>>> +rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
>>>>> +			     const void *l4_hdr)
>>>>> +{
>>>>> +	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
>>>>> +
>>>>> +	if (cksum != 0xffff)
>>>>> +		return -1;
>>>>> +
>>>>> +	return 0;
>>>>
>>>> There is behavior change in tap verify, I am asking just to verify if expected,
>>>>
>>>> Previously for UDP, if calculated checksum is '0', the 'rte_ipv4_udptcp_cksum()'
>>>> returns 0xFFFF.
>>>> And 0xFFFF is taken as good checksum by tap PMD.
>>
>> rte_ipv4_udptcp_cksum() cannot return 0xFFFF: this is only possible if
>> all data is 0. Before verifying a udp packet, the user must check that
>> it is not 0 (which means no checksum). In tcp, "Data offset" is never
>> 0. Moreover, port=0 is a reserved value for both udp and tcp.
>>
>>>> With new 'rte_ipv4_udptcp_cksum_verify()', if calculated checksum is '0' it will
>>>> be taken as bad checksum.
>>>>
>>>> I don't know if calculated checksum with valid checksum in place can return 0.
>>>>
>>>>
>>>> Also for TCP, 'rte_ipv4_udptcp_cksum_verify()' doesn't have inversion (cksum =
>>>> ~cksum;) seems changing pass/fail status of the checksum, unless I am not
>>>> missing anything here.
>>>
>>> Yes, it looks suspicious to me as well.
>>>
>>> Olivier, could you clarify, please.
>>
>> Before commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
>> the behavior was:
>>
>>   // rte_ipv4_udptcp_cksum() is 0xffff if checksum is valid
>>   // so cksum is 0 if checksum is valid
>>   cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>>   // ol_flags is set to PKT_RX_L4_CKSUM_GOOD if checksum is valid
>>   mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;
>>
>> After commit d5df2ae0428a ("net: fix unneeded replacement of TCP checksum 0"),
>> it is broken:
>>
>>   // rte_ipv4_udptcp_cksum() is 0 (tcp) or 0xffff (udp) if checksum is valid
>>   // so cksum is 0xffff (tcp) or 0 (udp) if checksum is valid
>>   cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>>   // ol_flags is set to BAD (tcp) or GOOD (udp) if checksum is valid
>>   mbuf->ol_flags |= cksum ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD;
>>
>> After patch 2/4 ("net/tap: fix Rx cksum flags on TCP packets"), the
>> correct behavior is restored:
>>
>>   // cksum is 0 (tcp) or 0xffff (udp) if checksum is valid
>>   // note: 0xffff for tcp cannot happen (there is at least 1 bit set in the header)
>>   // note: 0 for udp cannot happen (it is replaced by in rte_ipv4_udptcp_cksum())
>>   cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
>>   // cksum_ok is 1 if checksum is valid
>>   cksum_ok = (cksum == 0) || (cksum == 0xffff);
>>   // ol_flags is set to GOOD if checksum is valid
>>   mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>>
>> After this patch [3/4] ("net: introduce functions to verify L4 checksums"),
>> it is simplified by using rte_ipv4_udptcp_cksum_verify():
>>
>>   // cksum_ok is 1 if checksum is valid
>>   cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr, l4_hdr);
>>   // ol_flags is set to GOOD if checksum is valid
>>   mbuf->ol_flags |= cksum_ok ? PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
>>
> 
> Many thanks for the detailed explanations. It replies to all my
> questions (even not asked, but kept in my head).
> 

Thanks for clarification, after checking again with help of description above,
it looks good to me.
  

Patch

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 71282e8065..b14d5a1d55 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -365,11 +365,12 @@  tap_verify_csum(struct rte_mbuf *mbuf)
 					return;
 				}
 			}
-			cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
+			cksum_ok = !rte_ipv4_udptcp_cksum_verify(l3_hdr,
+								 l4_hdr);
 		} else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */
-			cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
+			cksum_ok = !rte_ipv6_udptcp_cksum_verify(l3_hdr,
+								 l4_hdr);
 		}
-		cksum_ok = (cksum == 0) || (cksum == 0xffff);
 		mbuf->ol_flags |= cksum_ok ?
 			PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
 	}
diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
index 8c189009b0..ef84bcc5bf 100644
--- a/lib/net/rte_ip.h
+++ b/lib/net/rte_ip.h
@@ -344,20 +344,10 @@  rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
 }
 
 /**
- * Process the IPv4 UDP or TCP checksum.
- *
- * The IP and layer 4 checksum must be set to 0 in the packet by
- * the caller.
- *
- * @param ipv4_hdr
- *   The pointer to the contiguous IPv4 header.
- * @param l4_hdr
- *   The pointer to the beginning of the L4 header.
- * @return
- *   The complemented checksum to set in the IP packet.
+ * @internal Calculate the non-complemented IPv4 L4 checksum
  */
 static inline uint16_t
-rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
+__rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
 {
 	uint32_t cksum;
 	uint32_t l3_len, l4_len;
@@ -374,16 +364,62 @@  rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
 	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
 
 	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
-	cksum = (~cksum) & 0xffff;
+
+	return (uint16_t)cksum;
+}
+
+/**
+ * Process the IPv4 UDP or TCP checksum.
+ *
+ * The IP and layer 4 checksum must be set to 0 in the packet by
+ * the caller.
+ *
+ * @param ipv4_hdr
+ *   The pointer to the contiguous IPv4 header.
+ * @param l4_hdr
+ *   The pointer to the beginning of the L4 header.
+ * @return
+ *   The complemented checksum to set in the IP packet.
+ */
+static inline uint16_t
+rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr)
+{
+	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
+
+	cksum = ~cksum;
+
 	/*
-	 * Per RFC 768:If the computed checksum is zero for UDP,
+	 * Per RFC 768: If the computed checksum is zero for UDP,
 	 * it is transmitted as all ones
 	 * (the equivalent in one's complement arithmetic).
 	 */
 	if (cksum == 0 && ipv4_hdr->next_proto_id == IPPROTO_UDP)
 		cksum = 0xffff;
 
-	return (uint16_t)cksum;
+	return cksum;
+}
+
+/**
+ * Validate the IPv4 UDP or TCP checksum.
+ *
+ * @param ipv4_hdr
+ *   The pointer to the contiguous IPv4 header.
+ * @param l4_hdr
+ *   The pointer to the beginning of the L4 header.
+ * @return
+ *   Return 0 if the checksum is correct, else -1.
+ */
+__rte_experimental
+static inline int
+rte_ipv4_udptcp_cksum_verify(const struct rte_ipv4_hdr *ipv4_hdr,
+			     const void *l4_hdr)
+{
+	uint16_t cksum = __rte_ipv4_udptcp_cksum(ipv4_hdr, l4_hdr);
+
+	if (cksum != 0xffff)
+		return -1;
+
+	return 0;
 }
 
 /**
@@ -448,6 +484,25 @@  rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
 	return __rte_raw_cksum_reduce(sum);
 }
 
+/**
+ * @internal Calculate the non-complemented IPv4 L4 checksum
+ */
+static inline uint16_t
+__rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
+{
+	uint32_t cksum;
+	uint32_t l4_len;
+
+	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+
+	cksum = rte_raw_cksum(l4_hdr, l4_len);
+	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
+
+	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+
+	return (uint16_t)cksum;
+}
+
 /**
  * Process the IPv6 UDP or TCP checksum.
  *
@@ -464,16 +519,10 @@  rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
 static inline uint16_t
 rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
 {
-	uint32_t cksum;
-	uint32_t l4_len;
-
-	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+	uint16_t cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
 
-	cksum = rte_raw_cksum(l4_hdr, l4_len);
-	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
+	cksum = ~cksum;
 
-	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
-	cksum = (~cksum) & 0xffff;
 	/*
 	 * Per RFC 768: If the computed checksum is zero for UDP,
 	 * it is transmitted as all ones
@@ -482,7 +531,34 @@  rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr)
 	if (cksum == 0 && ipv6_hdr->proto == IPPROTO_UDP)
 		cksum = 0xffff;
 
-	return (uint16_t)cksum;
+	return cksum;
+}
+
+/**
+ * Validate the IPv6 UDP or TCP checksum.
+ *
+ * The function accepts a 0 checksum, since it can exceptionally happen. See 8.1
+ * (Upper-Layer Checksums) in RFC 8200.
+ *
+ * @param ipv6_hdr
+ *   The pointer to the contiguous IPv6 header.
+ * @param l4_hdr
+ *   The pointer to the beginning of the L4 header.
+ * @return
+ *   Return 0 if the checksum is correct, else -1.
+ */
+__rte_experimental
+static inline int
+rte_ipv6_udptcp_cksum_verify(const struct rte_ipv6_hdr *ipv6_hdr,
+			     const void *l4_hdr)
+{
+	uint16_t cksum;
+
+	cksum = __rte_ipv6_udptcp_cksum(ipv6_hdr, l4_hdr);
+	if (cksum != 0xffff)
+		return -1;
+
+	return 0;
 }
 
 /** IPv6 fragment extension header. */