[dpdk-dev] librte_net: fix TX checksum offload problem with IPv6 packet.

Message ID 1418298582-6953-1-git-send-email-konstantin.ananyev@intel.com (mailing list archive)
State Accepted, archived
Headers

Commit Message

Ananyev, Konstantin Dec. 11, 2014, 11:49 a.m. UTC
For rte_ipv6_phdr_cksum() gcc 4.8.* with "-O3" not always generates
correct code.
Sometimes it 'forgets' to put len and proto fields of psd_header on the stack.
To overcome that problem and speedup things a bit, refactored rte_raw_cksum()
by splitting ipv6 pseudo-header csum calculation into 3 phases: 
1. calc sum for src & dst addresses
2. add sum for proto & len.
3. finalise sum
That makes gcc to generate valid code and helps to avoid any copying.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 lib/librte_net/rte_ip.h | 67 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 15 deletions(-)
  

Comments

Olivier Matz Dec. 11, 2014, 12:33 p.m. UTC | #1
Hi Konstantin,

On 12/11/2014 12:49 PM, Konstantin Ananyev wrote:
> For rte_ipv6_phdr_cksum() gcc 4.8.* with "-O3" not always generates
> correct code.
> Sometimes it 'forgets' to put len and proto fields of psd_header on the stack.
> To overcome that problem and speedup things a bit, refactored rte_raw_cksum()
> by splitting ipv6 pseudo-header csum calculation into 3 phases:
> 1. calc sum for src & dst addresses
> 2. add sum for proto & len.
> 3. finalise sum
> That makes gcc to generate valid code and helps to avoid any copying.
>
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>
  
Thomas Monjalon Dec. 16, 2014, 11:56 p.m. UTC | #2
> > For rte_ipv6_phdr_cksum() gcc 4.8.* with "-O3" not always generates
> > correct code.
> > Sometimes it 'forgets' to put len and proto fields of psd_header on the stack.
> > To overcome that problem and speedup things a bit, refactored rte_raw_cksum()
> > by splitting ipv6 pseudo-header csum calculation into 3 phases:
> > 1. calc sum for src & dst addresses
> > 2. add sum for proto & len.
> > 3. finalise sum
> > That makes gcc to generate valid code and helps to avoid any copying.
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
> Acked-by: Olivier Matz <olivier.matz@6wind.com>

Applied

Thanks
  

Patch

diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 46f0497..f0ec543 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -142,22 +142,24 @@  struct ipv4_hdr {
 	((x) >= IPV4_MIN_MCAST && (x) <= IPV4_MAX_MCAST) /**< check if IPv4 address is multicast */
 
 /**
- * Process the non-complemented checksum of a buffer.
+ * @internal Calculate a sum of all words in the buffer.
+ * Helper routine for the rte_raw_cksum().
  *
  * @param buf
  *   Pointer to the buffer.
  * @param len
  *   Length of the buffer.
+ * @param sum
+ *   Initial value of the sum.
  * @return
- *   The non-complemented checksum.
+ *   sum += Sum of all words in the buffer.
  */
-static inline uint16_t
-rte_raw_cksum(const char *buf, size_t len)
+static inline uint32_t
+__rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
 {
 	/* workaround gcc strict-aliasing warning */
 	uintptr_t ptr = (uintptr_t)buf;
 	const uint16_t *u16 = (const uint16_t *)ptr;
-	uint32_t sum = 0;
 
 	while (len >= (sizeof(*u16) * 4)) {
 		sum += u16[0];
@@ -177,12 +179,46 @@  rte_raw_cksum(const char *buf, size_t len)
 	if (len == 1)
 		sum += *((const uint8_t *)u16);
 
+	return sum;
+}
+
+/**
+ * @internal Reduce a sum to the non-complemented checksum.
+ * Helper routine for the rte_raw_cksum().
+ *
+ * @param sum
+ *   Value of the sum.
+ * @return
+ *   The non-complemented checksum.
+ */
+static inline uint16_t
+__rte_raw_cksum_reduce(uint32_t sum)
+{
 	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
 	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
 	return (uint16_t)sum;
 }
 
 /**
+ * Process the non-complemented checksum of a buffer.
+ *
+ * @param buf
+ *   Pointer to the buffer.
+ * @param len
+ *   Length of the buffer.
+ * @return
+ *   The non-complemented checksum.
+ */
+static inline uint16_t
+rte_raw_cksum(const void *buf, size_t len)
+{
+	uint32_t sum;
+
+	sum = __rte_raw_cksum(buf, len, 0);
+	return __rte_raw_cksum_reduce(sum);
+}
+
+/**
  * Process the IPv4 checksum of an IPv4 header.
  *
  * The checksum field must be set to 0 by the caller.
@@ -196,7 +232,7 @@  static inline uint16_t
 rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr)
 {
 	uint16_t cksum;
-	cksum = rte_raw_cksum((const char *)ipv4_hdr, sizeof(struct ipv4_hdr));
+	cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr));
 	return ((cksum == 0xffff) ? cksum : ~cksum);
 }
 
@@ -240,7 +276,7 @@  rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
 			(uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length)
 				- sizeof(struct ipv4_hdr)));
 	}
-	return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr));
+	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
 }
 
 /**
@@ -307,15 +343,12 @@  struct ipv6_hdr {
 static inline uint16_t
 rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
 {
-	struct ipv6_psd_header {
-		uint8_t src_addr[16]; /* IP address of source host. */
-		uint8_t dst_addr[16]; /* IP address of destination host. */
-		uint32_t len;         /* L4 length. */
-		uint32_t proto;       /* L4 protocol - top 3 bytes must be zero */
+	uint32_t sum;
+	struct {
+		uint32_t len;   /* L4 length. */
+		uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
 	} psd_hdr;
 
-	rte_memcpy(&psd_hdr.src_addr, ipv6_hdr->src_addr,
-		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr));
 	psd_hdr.proto = (ipv6_hdr->proto << 24);
 	if (ol_flags & PKT_TX_TCP_SEG) {
 		psd_hdr.len = 0;
@@ -323,7 +356,11 @@  rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
 		psd_hdr.len = ipv6_hdr->payload_len;
 	}
 
-	return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr));
+	sum = __rte_raw_cksum(ipv6_hdr->src_addr,
+		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
+		0);
+	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
+	return __rte_raw_cksum_reduce(sum);
 }
 
 /**