ip_frag: support IPv6 reassembly with extensions

Message ID 20240826112328.3028488-1-vignesh.purushotham.srinivas@ericsson.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series ip_frag: support IPv6 reassembly with extensions |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-compile-arm64-testing pending Testing pending
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-unit-arm64-testing pending Testing pending
ci/github-robot: build success github build: passed
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Vignesh Purushotham Srinivas Aug. 26, 2024, 11:23 a.m. UTC
From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>

Add support to ip_frag library to perform IPv6 reassembly
when extension headers are present before the fragment
extension in the packet.

Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
---
 .mailmap                          |  1 +
 lib/ip_frag/ip_frag_common.h      |  2 +
 lib/ip_frag/ip_reassembly.h       |  2 +
 lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
 4 files changed, 64 insertions(+), 9 deletions(-)
  

Comments

Stephen Hemminger Aug. 26, 2024, 3:41 p.m. UTC | #1
On Mon, 26 Aug 2024 13:23:28 +0200
<vignesh.purushotham.srinivas@ericsson.com> wrote:

> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };

This creates a 32 bit hole in the structure.
Better to put next_proto after the start field.

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {

I would break up this loop condition for clarity.
Something like:

	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
		if (next_proto < 0)
			break

Also, need a new test cases for this.
  
Konstantin Ananyev Sept. 17, 2024, 5:57 p.m. UTC | #2
> 
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> >  	uint32_t total_size;                   /* expected reassembled size */
> >  	uint32_t frag_size;                    /* size of fragments received */
> >  	uint32_t last_idx;                     /* index of next entry to fill */
> > +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> > +	uint8_t *next_proto;                   /* pointer of the next_proto field */
> >  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> >  };
> 
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.

Another alternative - use offset within the mbuf instead of pointer.

> 
> > +
> > +	while (next_proto != IPPROTO_FRAGMENT &&
> > +		num_exts < MAX_NUM_IPV6_EXTS &&
> > +		(next_proto = rte_ipv6_get_next_ext(
> > +		*last_ext, next_proto, &ext_len)) >= 0) {
> 
> I would break up this loop condition for clarity.

+ 1

> Something like:
> 
> 	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
> 		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
> 		if (next_proto < 0)
> 			break
> 
> Also, need a new test cases for this.

Agree, that would be good thing to add.
  
Konstantin Ananyev Sept. 17, 2024, 6:07 p.m. UTC | #3
> From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> 
> Add support to ip_frag library to perform IPv6 reassembly
> when extension headers are present before the fragment
> extension in the packet.
> 
> Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> ---
>  .mailmap                          |  1 +
>  lib/ip_frag/ip_frag_common.h      |  2 +
>  lib/ip_frag/ip_reassembly.h       |  2 +
>  lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
>  4 files changed, 64 insertions(+), 9 deletions(-)
> 
> diff --git a/.mailmap b/.mailmap
> index 4a508bafad..69b229a5b7 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
>  Victor Kaplansky <victork@redhat.com>
>  Victor Raj <victor.raj@intel.com>
>  Vidya Sagar Velumuri <vvelumuri@marvell.com>
> +Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
>  Vignesh Sridhar <vignesh.sridhar@intel.com>
>  Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
>  Vijaya Mohan Guvva <vijay1054@gmail.com>
> diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
> index 51fc9d47fb..db2665e846 100644
> --- a/lib/ip_frag/ip_frag_common.h
> +++ b/lib/ip_frag/ip_frag_common.h
> @@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
>  	fp->total_size = UINT32_MAX;
>  	fp->frag_size = 0;
>  	fp->last_idx = IP_MIN_FRAG_NUM;
> +	fp->exts_len = 0;
> +	fp->next_proto = NULL;
>  	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
>  	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
>  }
> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };
> 
> diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
> index 88863a98d1..8decf592a6 100644
> --- a/lib/ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/ip_frag/rte_ipv6_reassembly.c
> @@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	/* update ipv6 header for the reassembled datagram */
>  	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
> 
> +	payload_len += fp->exts_len;
>  	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
> 
>  	/*
>  	 * remove fragmentation header. note that per RFC2460, we need to update
>  	 * the last non-fragmentable header with the "next header" field to contain
> -	 * type of the first fragmentable header, but we currently don't support
> -	 * other headers, so we assume there are no other headers and thus update
> -	 * the main IPv6 header instead.
> +	 * type of the first fragmentable header.
>  	 */
> -	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
> -	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
> -	ip_hdr->proto = frag_hdr->next_header;
> +	frag_hdr = (struct rte_ipv6_fragment_ext *)
> +		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
> +	*fp->next_proto = frag_hdr->next_header;
> 
> +	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
>  	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
>  			rte_pktmbuf_mtod(m, char*), move_len);
> 
> @@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	return m;
>  }
> 
> +/*
> + * Function to crawl through the extension header stack.
> + * This function breaks as soon a the fragment header is
> + * found and returns the total length the traversed exts
> + * and the last extension before the fragment header
> + */
> +static inline uint32_t
> +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> +{
> +	uint32_t total_len = 0;
> +	uint8_t num_exts = 0;
> +	size_t ext_len = 0;
> +	*last_ext = (uint8_t *)(ip_hdr + 1);
> +	int next_proto = ip_hdr->proto;
> +#define MAX_NUM_IPV6_EXTS 8

As a nit - let's keep coding style consistent:
Pls move #define outside the function definition. 

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {
> +
> +		total_len += ext_len;
> +
> +		if (next_proto == IPPROTO_FRAGMENT)
> +			return total_len;
> +
> +		*last_ext += ext_len;
> +		num_exts++;
> +	}

So if  IPPROTO_FRAGMENT was not found, we just use extension #8 instead?
Shouldn't we return an error in that case,  and probably drop the fragment?

> +	return total_len;
> +}
> +
>  /*
>   * Process new mbuf with fragment of IPV6 datagram.
>   * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
> @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  {
>  	struct ip_frag_pkt *fp;
>  	struct ip_frag_key key;
> +	uint8_t *last_ipv6_ext;
> +	uint32_t exts_len;
>  	uint16_t ip_ofs;
>  	int32_t ip_len;
>  	int32_t trim;
> @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/*
>  	 * as per RFC2460, payload length contains all extension headers
>  	 * as well.
> -	 * since we don't support anything but frag headers,
> -	 * this is what we remove from the payload len.
> +	 * so we remove the extension len from the payload len.
>  	 */
> -	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
> +	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> +	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);

Hmm..., as I remember ip_len is what we want to preserve in the packet...
Why we want to remove all previous ext headers here?

>  	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/* process the fragmented packet. */
>  	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
>  			MORE_FRAGS(frag_hdr->frag_data));

Can you explain why we setting these new fp fields after 'ip_frag_process()'?
Ip_frag_process() itself can call reassembly() - if all fragments are already in place.

> +
> +	/* store extension stack info, only for first fragment */
> +	if (ip_ofs == 0) {

If we want it for first fragment only, why not invoke ip_frag_get_last_exthdr()
only when ip_ofs == 0?
 
> +		/*
> +		 * fp->next_proto points to either the IP's next header
> +		 * or th next header of the extension before the fragment
> +		 * extension
> +		 */
> +		fp->next_proto = (uint8_t *)&ip_hdr->proto;
> +		if (exts_len > 0) {
> +			fp->exts_len = exts_len;
> +			fp->next_proto = last_ipv6_ext;
> +		}
> +	}
> +
>  	ip_frag_inuse(tbl, fp);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> --
> 2.34.1
  

Patch

diff --git a/.mailmap b/.mailmap
index 4a508bafad..69b229a5b7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1548,6 +1548,7 @@  Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
 Victor Kaplansky <victork@redhat.com>
 Victor Raj <victor.raj@intel.com>
 Vidya Sagar Velumuri <vvelumuri@marvell.com>
+Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
 Vignesh Sridhar <vignesh.sridhar@intel.com>
 Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
 Vijaya Mohan Guvva <vijay1054@gmail.com>
diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
index 51fc9d47fb..db2665e846 100644
--- a/lib/ip_frag/ip_frag_common.h
+++ b/lib/ip_frag/ip_frag_common.h
@@ -169,6 +169,8 @@  ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
 	fp->total_size = UINT32_MAX;
 	fp->frag_size = 0;
 	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->exts_len = 0;
+	fp->next_proto = NULL;
 	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
 	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
index 54afed5417..429e74f1b3 100644
--- a/lib/ip_frag/ip_reassembly.h
+++ b/lib/ip_frag/ip_reassembly.h
@@ -54,6 +54,8 @@  struct __rte_cache_aligned ip_frag_pkt {
 	uint32_t total_size;                   /* expected reassembled size */
 	uint32_t frag_size;                    /* size of fragments received */
 	uint32_t last_idx;                     /* index of next entry to fill */
+	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
+	uint8_t *next_proto;                   /* pointer of the next_proto field */
 	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
 };
 
diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
index 88863a98d1..8decf592a6 100644
--- a/lib/ip_frag/rte_ipv6_reassembly.c
+++ b/lib/ip_frag/rte_ipv6_reassembly.c
@@ -91,19 +91,19 @@  ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	/* update ipv6 header for the reassembled datagram */
 	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
 
+	payload_len += fp->exts_len;
 	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
 
 	/*
 	 * remove fragmentation header. note that per RFC2460, we need to update
 	 * the last non-fragmentable header with the "next header" field to contain
-	 * type of the first fragmentable header, but we currently don't support
-	 * other headers, so we assume there are no other headers and thus update
-	 * the main IPv6 header instead.
+	 * type of the first fragmentable header.
 	 */
-	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
-	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
-	ip_hdr->proto = frag_hdr->next_header;
+	frag_hdr = (struct rte_ipv6_fragment_ext *)
+		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
+	*fp->next_proto = frag_hdr->next_header;
 
+	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
 	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
 			rte_pktmbuf_mtod(m, char*), move_len);
 
@@ -112,6 +112,39 @@  ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	return m;
 }
 
+/*
+ * Function to crawl through the extension header stack.
+ * This function breaks as soon a the fragment header is
+ * found and returns the total length the traversed exts
+ * and the last extension before the fragment header
+ */
+static inline uint32_t
+ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
+{
+	uint32_t total_len = 0;
+	uint8_t num_exts = 0;
+	size_t ext_len = 0;
+	*last_ext = (uint8_t *)(ip_hdr + 1);
+	int next_proto = ip_hdr->proto;
+#define MAX_NUM_IPV6_EXTS 8
+
+	while (next_proto != IPPROTO_FRAGMENT &&
+		num_exts < MAX_NUM_IPV6_EXTS &&
+		(next_proto = rte_ipv6_get_next_ext(
+		*last_ext, next_proto, &ext_len)) >= 0) {
+
+		total_len += ext_len;
+
+		if (next_proto == IPPROTO_FRAGMENT)
+			return total_len;
+
+		*last_ext += ext_len;
+		num_exts++;
+	}
+
+	return total_len;
+}
+
 /*
  * Process new mbuf with fragment of IPV6 datagram.
  * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
@@ -139,6 +172,8 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 {
 	struct ip_frag_pkt *fp;
 	struct ip_frag_key key;
+	uint8_t *last_ipv6_ext;
+	uint32_t exts_len;
 	uint16_t ip_ofs;
 	int32_t ip_len;
 	int32_t trim;
@@ -154,10 +189,10 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/*
 	 * as per RFC2460, payload length contains all extension headers
 	 * as well.
-	 * since we don't support anything but frag headers,
-	 * this is what we remove from the payload len.
+	 * so we remove the extension len from the payload len.
 	 */
-	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
+	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
 	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
@@ -201,6 +236,21 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/* process the fragmented packet. */
 	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
 			MORE_FRAGS(frag_hdr->frag_data));
+
+	/* store extension stack info, only for first fragment */
+	if (ip_ofs == 0) {
+		/*
+		 * fp->next_proto points to either the IP's next header
+		 * or th next header of the extension before the fragment
+		 * extension
+		 */
+		fp->next_proto = (uint8_t *)&ip_hdr->proto;
+		if (exts_len > 0) {
+			fp->exts_len = exts_len;
+			fp->next_proto = last_ipv6_ext;
+		}
+	}
+
 	ip_frag_inuse(tbl, fp);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"