ip_frag: support IPv6 reassembly with extensions

Message ID 20240826112328.3028488-1-vignesh.purushotham.srinivas@ericsson.com (mailing list archive)
State Superseded
Delegated to: Thomas Monjalon
Headers
Series ip_frag: support IPv6 reassembly with extensions |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-compile-arm64-testing pending Testing pending
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-unit-arm64-testing pending Testing pending
ci/github-robot: build success github build: passed
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Vignesh Purushotham Srinivas Aug. 26, 2024, 11:23 a.m. UTC
From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>

Add support to ip_frag library to perform IPv6 reassembly
when extension headers are present before the fragment
extension in the packet.

Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
---
 .mailmap                          |  1 +
 lib/ip_frag/ip_frag_common.h      |  2 +
 lib/ip_frag/ip_reassembly.h       |  2 +
 lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
 4 files changed, 64 insertions(+), 9 deletions(-)
  

Comments

Stephen Hemminger Aug. 26, 2024, 3:41 p.m. UTC | #1
On Mon, 26 Aug 2024 13:23:28 +0200
<vignesh.purushotham.srinivas@ericsson.com> wrote:

> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };

This creates a 32 bit hole in the structure.
Better to put next_proto after the start field.

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {

I would break up this loop condition for clarity.
Something like:

	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
		if (next_proto < 0)
			break

Also, need a new test cases for this.
  
Konstantin Ananyev Sept. 17, 2024, 5:57 p.m. UTC | #2
> 
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> >  	uint32_t total_size;                   /* expected reassembled size */
> >  	uint32_t frag_size;                    /* size of fragments received */
> >  	uint32_t last_idx;                     /* index of next entry to fill */
> > +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> > +	uint8_t *next_proto;                   /* pointer of the next_proto field */
> >  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> >  };
> 
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.

Another alternative - use offset within the mbuf instead of pointer.

> 
> > +
> > +	while (next_proto != IPPROTO_FRAGMENT &&
> > +		num_exts < MAX_NUM_IPV6_EXTS &&
> > +		(next_proto = rte_ipv6_get_next_ext(
> > +		*last_ext, next_proto, &ext_len)) >= 0) {
> 
> I would break up this loop condition for clarity.

+ 1

> Something like:
> 
> 	while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
> 		next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
> 		if (next_proto < 0)
> 			break
> 
> Also, need a new test cases for this.

Agree, that would be good thing to add.
  
Konstantin Ananyev Sept. 17, 2024, 6:07 p.m. UTC | #3
> From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> 
> Add support to ip_frag library to perform IPv6 reassembly
> when extension headers are present before the fragment
> extension in the packet.
> 
> Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> ---
>  .mailmap                          |  1 +
>  lib/ip_frag/ip_frag_common.h      |  2 +
>  lib/ip_frag/ip_reassembly.h       |  2 +
>  lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
>  4 files changed, 64 insertions(+), 9 deletions(-)
> 
> diff --git a/.mailmap b/.mailmap
> index 4a508bafad..69b229a5b7 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
>  Victor Kaplansky <victork@redhat.com>
>  Victor Raj <victor.raj@intel.com>
>  Vidya Sagar Velumuri <vvelumuri@marvell.com>
> +Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
>  Vignesh Sridhar <vignesh.sridhar@intel.com>
>  Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
>  Vijaya Mohan Guvva <vijay1054@gmail.com>
> diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
> index 51fc9d47fb..db2665e846 100644
> --- a/lib/ip_frag/ip_frag_common.h
> +++ b/lib/ip_frag/ip_frag_common.h
> @@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
>  	fp->total_size = UINT32_MAX;
>  	fp->frag_size = 0;
>  	fp->last_idx = IP_MIN_FRAG_NUM;
> +	fp->exts_len = 0;
> +	fp->next_proto = NULL;
>  	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
>  	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
>  }
> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
>  	uint32_t total_size;                   /* expected reassembled size */
>  	uint32_t frag_size;                    /* size of fragments received */
>  	uint32_t last_idx;                     /* index of next entry to fill */
> +	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
> +	uint8_t *next_proto;                   /* pointer of the next_proto field */
>  	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
>  };
> 
> diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
> index 88863a98d1..8decf592a6 100644
> --- a/lib/ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/ip_frag/rte_ipv6_reassembly.c
> @@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	/* update ipv6 header for the reassembled datagram */
>  	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
> 
> +	payload_len += fp->exts_len;
>  	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
> 
>  	/*
>  	 * remove fragmentation header. note that per RFC2460, we need to update
>  	 * the last non-fragmentable header with the "next header" field to contain
> -	 * type of the first fragmentable header, but we currently don't support
> -	 * other headers, so we assume there are no other headers and thus update
> -	 * the main IPv6 header instead.
> +	 * type of the first fragmentable header.
>  	 */
> -	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
> -	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
> -	ip_hdr->proto = frag_hdr->next_header;
> +	frag_hdr = (struct rte_ipv6_fragment_ext *)
> +		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
> +	*fp->next_proto = frag_hdr->next_header;
> 
> +	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
>  	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
>  			rte_pktmbuf_mtod(m, char*), move_len);
> 
> @@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  	return m;
>  }
> 
> +/*
> + * Function to crawl through the extension header stack.
> + * This function breaks as soon a the fragment header is
> + * found and returns the total length the traversed exts
> + * and the last extension before the fragment header
> + */
> +static inline uint32_t
> +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> +{
> +	uint32_t total_len = 0;
> +	uint8_t num_exts = 0;
> +	size_t ext_len = 0;
> +	*last_ext = (uint8_t *)(ip_hdr + 1);
> +	int next_proto = ip_hdr->proto;
> +#define MAX_NUM_IPV6_EXTS 8

As a nit - let's keep coding style consistent:
Pls move #define outside the function definition. 

> +
> +	while (next_proto != IPPROTO_FRAGMENT &&
> +		num_exts < MAX_NUM_IPV6_EXTS &&
> +		(next_proto = rte_ipv6_get_next_ext(
> +		*last_ext, next_proto, &ext_len)) >= 0) {
> +
> +		total_len += ext_len;
> +
> +		if (next_proto == IPPROTO_FRAGMENT)
> +			return total_len;
> +
> +		*last_ext += ext_len;
> +		num_exts++;
> +	}

So if  IPPROTO_FRAGMENT was not found, we just use extension #8 instead?
Shouldn't we return an error in that case,  and probably drop the fragment?

> +	return total_len;
> +}
> +
>  /*
>   * Process new mbuf with fragment of IPV6 datagram.
>   * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
> @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  {
>  	struct ip_frag_pkt *fp;
>  	struct ip_frag_key key;
> +	uint8_t *last_ipv6_ext;
> +	uint32_t exts_len;
>  	uint16_t ip_ofs;
>  	int32_t ip_len;
>  	int32_t trim;
> @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/*
>  	 * as per RFC2460, payload length contains all extension headers
>  	 * as well.
> -	 * since we don't support anything but frag headers,
> -	 * this is what we remove from the payload len.
> +	 * so we remove the extension len from the payload len.
>  	 */
> -	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
> +	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> +	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);

Hmm..., as I remember ip_len is what we want to preserve in the packet...
Why we want to remove all previous ext headers here?

>  	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  	/* process the fragmented packet. */
>  	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
>  			MORE_FRAGS(frag_hdr->frag_data));

Can you explain why we setting these new fp fields after 'ip_frag_process()'?
Ip_frag_process() itself can call reassembly() - if all fragments are already in place.

> +
> +	/* store extension stack info, only for first fragment */
> +	if (ip_ofs == 0) {

If we want it for first fragment only, why not invoke ip_frag_get_last_exthdr()
only when ip_ofs == 0?
 
> +		/*
> +		 * fp->next_proto points to either the IP's next header
> +		 * or th next header of the extension before the fragment
> +		 * extension
> +		 */
> +		fp->next_proto = (uint8_t *)&ip_hdr->proto;
> +		if (exts_len > 0) {
> +			fp->exts_len = exts_len;
> +			fp->next_proto = last_ipv6_ext;
> +		}
> +	}
> +
>  	ip_frag_inuse(tbl, fp);
> 
>  	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> --
> 2.34.1
  
Vignesh Purushotham Srinivas Oct. 14, 2024, 10:38 a.m. UTC | #4
-----Original Message-----
From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: Stephen Hemminger <stephen@networkplumber.org>,
vignesh.purushotham.srinivas@ericsson.com
<vignesh.purushotham.srinivas@ericsson.com>
Cc: konstantin.v.ananyev@yandex.ru <konstantin.v.ananyev@yandex.ru>,
dev@dpdk.org <dev@dpdk.org>
Subject: RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
Date: Tue, 17 Sep 2024 17:57:59 +0000

[You don't often get email from konstantin.ananyev@huawei.com. Learn
why this is important at
https://aka.ms/LearnAboutSenderIdentification ]

> 
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
> 
> > diff --git a/lib/ip_frag/ip_reassembly.h
> > b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> >     uint32_t total_size;                   /* expected reassembled
> > size */
> >     uint32_t frag_size;                    /* size of fragments
> > received */
> >     uint32_t last_idx;                     /* index of next entry
> > to fill */
> > +   uint32_t exts_len;                     /* length of extension
> > hdrs for first fragment */
> > +   uint8_t *next_proto;                   /* pointer of the
> > next_proto field */
> >     struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> >  };
> 
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.

Another alternative - use offset within the mbuf instead of pointer.

ACK

> 
> > +
> > +   while (next_proto != IPPROTO_FRAGMENT &&
> > +           num_exts < MAX_NUM_IPV6_EXTS &&
> > +           (next_proto = rte_ipv6_get_next_ext(
> > +           *last_ext, next_proto, &ext_len)) >= 0) {
> 
> I would break up this loop condition for clarity.

+ 1

ACK

> Something like:
> 
>       while (next_proto != IPPROTO_FRAGMENT && num_exts <
> MAX_NUM_IPV6_EXTS) {
>               next_proto = rte_ipv6_get_next_ext(*last_ext,
> next_proto, &ext_len);
>               if (next_proto < 0)
>                       break
> 
> Also, need a new test cases for this.

Agree, that would be good thing to add.

ACK
  
Vignesh Purushotham Srinivas Oct. 14, 2024, 4:11 p.m. UTC | #5
-----Original Message-----
From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: vignesh.purushotham.srinivas@ericsson.com
<vignesh.purushotham.srinivas@ericsson.com>,
konstantin.v.ananyev@yandex.ru <konstantin.v.ananyev@yandex.ru>
Cc: dev@dpdk.org <dev@dpdk.org>
Subject: RE: [PATCH] ip_frag: support IPv6 reassembly with extensions
Date: Tue, 17 Sep 2024 18:07:25 +0000

> > +/*
> > + * Function to crawl through the extension header stack.
> > + * This function breaks as soon a the fragment header is
> > + * found and returns the total length the traversed exts
> > + * and the last extension before the fragment header
> > + */
> > +static inline uint32_t
> > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t
**last_ext)
> > +{
> > +     uint32_t total_len = 0;
> > +     uint8_t num_exts = 0;
> > +     size_t ext_len = 0;
> > +     *last_ext = (uint8_t *)(ip_hdr + 1);
> > +     int next_proto = ip_hdr->proto;
> > +#define MAX_NUM_IPV6_EXTS 8
> 
> As a nit - let's keep coding style consistent:
> Pls move #define outside the function definition.

ACK

> > +
> > +     while (next_proto != IPPROTO_FRAGMENT &&
> > +             num_exts < MAX_NUM_IPV6_EXTS &&
> > +             (next_proto = rte_ipv6_get_next_ext(
> > +             *last_ext, next_proto, &ext_len)) >= 0) {
> > +
> > +             total_len += ext_len;
> > +
> > +             if (next_proto == IPPROTO_FRAGMENT)
> > +                     return total_len;
> > +
> > +             *last_ext += ext_len;
> > +             num_exts++;
> > +     }
> 
> So if  IPPROTO_FRAGMENT was not found, we just use extension #8
instead?
> Shouldn't we return an error in that case,  and probably drop the
fragment?

Hmmm, looks like a bug. Will fix this in the next version

> > +     return total_len;
> > +}
> > +
> >  /*
> >   * Process new mbuf with fragment of IPV6 datagram.
> >   * Incoming mbuf should have its l2_len/l3_len fields setup
correctly.
> > @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >  {
> >       struct ip_frag_pkt *fp;
> >       struct ip_frag_key key;
> > +     uint8_t *last_ipv6_ext;
> > +     uint32_t exts_len;
> >       uint16_t ip_ofs;
> >       int32_t ip_len;
> >       int32_t trim;
> > @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >       /*
> >        * as per RFC2460, payload length contains all extension
headers
> >        * as well.
> > -      * since we don't support anything but frag headers,
> > -      * this is what we remove from the payload len.
> > +      * so we remove the extension len from the payload len.
> >        */
> > -     ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) -
sizeof(*frag_hdr);
> > +     exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> > +     ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len -
sizeof(*frag_hdr);
> 
> Hmm..., as I remember ip_len is what we want to preserve in the
packet...
> Why we want to remove all previous ext headers here?

The ip_len for packet is computed again later, after reassembly.
However, here we
compute the ip_len to perform some checks on the packet. And this math
follows
what is mentioned in the RFC.

> >       trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
> >
> >       IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> > @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct
rte_ip_frag_tbl *tbl,
> >       /* process the fragmented packet. */
> >       mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
> >                       MORE_FRAGS(frag_hdr->frag_data));
> 
> Can you explain why we setting these new fp fields after
'ip_frag_process()'?
> Ip_frag_process() itself can call reassembly() - if all fragments are
already in place.

This is a bug, but will fix it in the next version

> > +
> > +     /* store extension stack info, only for first fragment */
> > +     if (ip_ofs == 0) {
> 
> If we want it for first fragment only, why not invoke
ip_frag_get_last_exthdr()
> only when ip_ofs == 0?

No, ip_frag_get_last_exthdr() is called for all fragments to find the
len of
of extensions (if present) to perform length checks but we store this
information
only for the first fragment. After reassembly, this stored information
is used to
restore the extensions from the first fragment. 

> > +             /*
> > +              * fp->next_proto points to either the IP's next
header
> > +              * or th next header of the extension before the
fragment
> > +              * extension
> > +              */
> > +             fp->next_proto = (uint8_t *)&ip_hdr->proto;
> > +             if (exts_len > 0) {
> > +                     fp->exts_len = exts_len;
> > +                     fp->next_proto = last_ipv6_ext;
> > +             }
> > +     }
> > +
> >       ip_frag_inuse(tbl, fp);
> >
> >       IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> > --
> > 2.34.1
  

Patch

diff --git a/.mailmap b/.mailmap
index 4a508bafad..69b229a5b7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1548,6 +1548,7 @@  Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
 Victor Kaplansky <victork@redhat.com>
 Victor Raj <victor.raj@intel.com>
 Vidya Sagar Velumuri <vvelumuri@marvell.com>
+Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
 Vignesh Sridhar <vignesh.sridhar@intel.com>
 Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
 Vijaya Mohan Guvva <vijay1054@gmail.com>
diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
index 51fc9d47fb..db2665e846 100644
--- a/lib/ip_frag/ip_frag_common.h
+++ b/lib/ip_frag/ip_frag_common.h
@@ -169,6 +169,8 @@  ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
 	fp->total_size = UINT32_MAX;
 	fp->frag_size = 0;
 	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->exts_len = 0;
+	fp->next_proto = NULL;
 	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
 	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
index 54afed5417..429e74f1b3 100644
--- a/lib/ip_frag/ip_reassembly.h
+++ b/lib/ip_frag/ip_reassembly.h
@@ -54,6 +54,8 @@  struct __rte_cache_aligned ip_frag_pkt {
 	uint32_t total_size;                   /* expected reassembled size */
 	uint32_t frag_size;                    /* size of fragments received */
 	uint32_t last_idx;                     /* index of next entry to fill */
+	uint32_t exts_len;                     /* length of extension hdrs for first fragment */
+	uint8_t *next_proto;                   /* pointer of the next_proto field */
 	struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
 };
 
diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
index 88863a98d1..8decf592a6 100644
--- a/lib/ip_frag/rte_ipv6_reassembly.c
+++ b/lib/ip_frag/rte_ipv6_reassembly.c
@@ -91,19 +91,19 @@  ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	/* update ipv6 header for the reassembled datagram */
 	ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
 
+	payload_len += fp->exts_len;
 	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
 
 	/*
 	 * remove fragmentation header. note that per RFC2460, we need to update
 	 * the last non-fragmentable header with the "next header" field to contain
-	 * type of the first fragmentable header, but we currently don't support
-	 * other headers, so we assume there are no other headers and thus update
-	 * the main IPv6 header instead.
+	 * type of the first fragmentable header.
 	 */
-	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
-	frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
-	ip_hdr->proto = frag_hdr->next_header;
+	frag_hdr = (struct rte_ipv6_fragment_ext *)
+		((uint8_t *) (ip_hdr + 1) + fp->exts_len);
+	*fp->next_proto = frag_hdr->next_header;
 
+	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
 	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
 			rte_pktmbuf_mtod(m, char*), move_len);
 
@@ -112,6 +112,39 @@  ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 	return m;
 }
 
+/*
+ * Function to crawl through the extension header stack.
+ * This function breaks as soon a the fragment header is
+ * found and returns the total length the traversed exts
+ * and the last extension before the fragment header
+ */
+static inline uint32_t
+ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
+{
+	uint32_t total_len = 0;
+	uint8_t num_exts = 0;
+	size_t ext_len = 0;
+	*last_ext = (uint8_t *)(ip_hdr + 1);
+	int next_proto = ip_hdr->proto;
+#define MAX_NUM_IPV6_EXTS 8
+
+	while (next_proto != IPPROTO_FRAGMENT &&
+		num_exts < MAX_NUM_IPV6_EXTS &&
+		(next_proto = rte_ipv6_get_next_ext(
+		*last_ext, next_proto, &ext_len)) >= 0) {
+
+		total_len += ext_len;
+
+		if (next_proto == IPPROTO_FRAGMENT)
+			return total_len;
+
+		*last_ext += ext_len;
+		num_exts++;
+	}
+
+	return total_len;
+}
+
 /*
  * Process new mbuf with fragment of IPV6 datagram.
  * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
@@ -139,6 +172,8 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 {
 	struct ip_frag_pkt *fp;
 	struct ip_frag_key key;
+	uint8_t *last_ipv6_ext;
+	uint32_t exts_len;
 	uint16_t ip_ofs;
 	int32_t ip_len;
 	int32_t trim;
@@ -154,10 +189,10 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/*
 	 * as per RFC2460, payload length contains all extension headers
 	 * as well.
-	 * since we don't support anything but frag headers,
-	 * this is what we remove from the payload len.
+	 * so we remove the extension len from the payload len.
 	 */
-	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+	exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
+	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
 	trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
@@ -201,6 +236,21 @@  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 	/* process the fragmented packet. */
 	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
 			MORE_FRAGS(frag_hdr->frag_data));
+
+	/* store extension stack info, only for first fragment */
+	if (ip_ofs == 0) {
+		/*
+		 * fp->next_proto points to either the IP's next header
+		 * or th next header of the extension before the fragment
+		 * extension
+		 */
+		fp->next_proto = (uint8_t *)&ip_hdr->proto;
+		if (exts_len > 0) {
+			fp->exts_len = exts_len;
+			fp->next_proto = last_ipv6_ext;
+		}
+	}
+
 	ip_frag_inuse(tbl, fp);
 
 	IP_FRAG_LOG(DEBUG, "%s:%d:\n"