ip_frag: support IPv6 reassembly with extensions
Checks
Commit Message
From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
Add support to ip_frag library to perform IPv6 reassembly
when extension headers are present before the fragment
extension in the packet.
Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
---
.mailmap | 1 +
lib/ip_frag/ip_frag_common.h | 2 +
lib/ip_frag/ip_reassembly.h | 2 +
lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
4 files changed, 64 insertions(+), 9 deletions(-)
Comments
On Mon, 26 Aug 2024 13:23:28 +0200
<vignesh.purushotham.srinivas@ericsson.com> wrote:
> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> uint32_t total_size; /* expected reassembled size */
> uint32_t frag_size; /* size of fragments received */
> uint32_t last_idx; /* index of next entry to fill */
> + uint32_t exts_len; /* length of extension hdrs for first fragment */
> + uint8_t *next_proto; /* pointer of the next_proto field */
> struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> };
This creates a 32 bit hole in the structure.
Better to put next_proto after the start field.
> +
> + while (next_proto != IPPROTO_FRAGMENT &&
> + num_exts < MAX_NUM_IPV6_EXTS &&
> + (next_proto = rte_ipv6_get_next_ext(
> + *last_ext, next_proto, &ext_len)) >= 0) {
I would break up this loop condition for clarity.
Something like:
while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
if (next_proto < 0)
break
Also, need a new test cases for this.
>
> On Mon, 26 Aug 2024 13:23:28 +0200
> <vignesh.purushotham.srinivas@ericsson.com> wrote:
>
> > diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> > index 54afed5417..429e74f1b3 100644
> > --- a/lib/ip_frag/ip_reassembly.h
> > +++ b/lib/ip_frag/ip_reassembly.h
> > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> > uint32_t total_size; /* expected reassembled size */
> > uint32_t frag_size; /* size of fragments received */
> > uint32_t last_idx; /* index of next entry to fill */
> > + uint32_t exts_len; /* length of extension hdrs for first fragment */
> > + uint8_t *next_proto; /* pointer of the next_proto field */
> > struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> > };
>
> This creates a 32 bit hole in the structure.
> Better to put next_proto after the start field.
Another alternative - use offset within the mbuf instead of pointer.
>
> > +
> > + while (next_proto != IPPROTO_FRAGMENT &&
> > + num_exts < MAX_NUM_IPV6_EXTS &&
> > + (next_proto = rte_ipv6_get_next_ext(
> > + *last_ext, next_proto, &ext_len)) >= 0) {
>
> I would break up this loop condition for clarity.
+ 1
> Something like:
>
> while (next_proto != IPPROTO_FRAGMENT && num_exts < MAX_NUM_IPV6_EXTS) {
> next_proto = rte_ipv6_get_next_ext(*last_ext, next_proto, &ext_len);
> if (next_proto < 0)
> break
>
> Also, need a new test cases for this.
Agree, that would be good thing to add.
> From: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
>
> Add support to ip_frag library to perform IPv6 reassembly
> when extension headers are present before the fragment
> extension in the packet.
>
> Signed-off-by: Vignesh PS <vignesh.purushotham.srinivas@ericsson.com>
> ---
> .mailmap | 1 +
> lib/ip_frag/ip_frag_common.h | 2 +
> lib/ip_frag/ip_reassembly.h | 2 +
> lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++----
> 4 files changed, 64 insertions(+), 9 deletions(-)
>
> diff --git a/.mailmap b/.mailmap
> index 4a508bafad..69b229a5b7 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
> Victor Kaplansky <victork@redhat.com>
> Victor Raj <victor.raj@intel.com>
> Vidya Sagar Velumuri <vvelumuri@marvell.com>
> +Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
> Vignesh Sridhar <vignesh.sridhar@intel.com>
> Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
> Vijaya Mohan Guvva <vijay1054@gmail.com>
> diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h
> index 51fc9d47fb..db2665e846 100644
> --- a/lib/ip_frag/ip_frag_common.h
> +++ b/lib/ip_frag/ip_frag_common.h
> @@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
> fp->total_size = UINT32_MAX;
> fp->frag_size = 0;
> fp->last_idx = IP_MIN_FRAG_NUM;
> + fp->exts_len = 0;
> + fp->next_proto = NULL;
> fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
> fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
> }
> diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h
> index 54afed5417..429e74f1b3 100644
> --- a/lib/ip_frag/ip_reassembly.h
> +++ b/lib/ip_frag/ip_reassembly.h
> @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
> uint32_t total_size; /* expected reassembled size */
> uint32_t frag_size; /* size of fragments received */
> uint32_t last_idx; /* index of next entry to fill */
> + uint32_t exts_len; /* length of extension hdrs for first fragment */
> + uint8_t *next_proto; /* pointer of the next_proto field */
> struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
> };
>
> diff --git a/lib/ip_frag/rte_ipv6_reassembly.c b/lib/ip_frag/rte_ipv6_reassembly.c
> index 88863a98d1..8decf592a6 100644
> --- a/lib/ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/ip_frag/rte_ipv6_reassembly.c
> @@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
> /* update ipv6 header for the reassembled datagram */
> ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
>
> + payload_len += fp->exts_len;
> ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
>
> /*
> * remove fragmentation header. note that per RFC2460, we need to update
> * the last non-fragmentable header with the "next header" field to contain
> - * type of the first fragmentable header, but we currently don't support
> - * other headers, so we assume there are no other headers and thus update
> - * the main IPv6 header instead.
> + * type of the first fragmentable header.
> */
> - move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
> - frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
> - ip_hdr->proto = frag_hdr->next_header;
> + frag_hdr = (struct rte_ipv6_fragment_ext *)
> + ((uint8_t *) (ip_hdr + 1) + fp->exts_len);
> + *fp->next_proto = frag_hdr->next_header;
>
> + move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
> ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
> rte_pktmbuf_mtod(m, char*), move_len);
>
> @@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
> return m;
> }
>
> +/*
> + * Function to crawl through the extension header stack.
> + * This function breaks as soon a the fragment header is
> + * found and returns the total length the traversed exts
> + * and the last extension before the fragment header
> + */
> +static inline uint32_t
> +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
> +{
> + uint32_t total_len = 0;
> + uint8_t num_exts = 0;
> + size_t ext_len = 0;
> + *last_ext = (uint8_t *)(ip_hdr + 1);
> + int next_proto = ip_hdr->proto;
> +#define MAX_NUM_IPV6_EXTS 8
As a nit - let's keep coding style consistent:
Pls move #define outside the function definition.
> +
> + while (next_proto != IPPROTO_FRAGMENT &&
> + num_exts < MAX_NUM_IPV6_EXTS &&
> + (next_proto = rte_ipv6_get_next_ext(
> + *last_ext, next_proto, &ext_len)) >= 0) {
> +
> + total_len += ext_len;
> +
> + if (next_proto == IPPROTO_FRAGMENT)
> + return total_len;
> +
> + *last_ext += ext_len;
> + num_exts++;
> + }
So if IPPROTO_FRAGMENT was not found, we just use extension #8 instead?
Shouldn't we return an error in that case, and probably drop the fragment?
> + return total_len;
> +}
> +
> /*
> * Process new mbuf with fragment of IPV6 datagram.
> * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
> @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> {
> struct ip_frag_pkt *fp;
> struct ip_frag_key key;
> + uint8_t *last_ipv6_ext;
> + uint32_t exts_len;
> uint16_t ip_ofs;
> int32_t ip_len;
> int32_t trim;
> @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> /*
> * as per RFC2460, payload length contains all extension headers
> * as well.
> - * since we don't support anything but frag headers,
> - * this is what we remove from the payload len.
> + * so we remove the extension len from the payload len.
> */
> - ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
> + exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
> + ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
Hmm..., as I remember ip_len is what we want to preserve in the packet...
Why we want to remove all previous ext headers here?
> trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
>
> IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> /* process the fragmented packet. */
> mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
> MORE_FRAGS(frag_hdr->frag_data));
Can you explain why we setting these new fp fields after 'ip_frag_process()'?
Ip_frag_process() itself can call reassembly() - if all fragments are already in place.
> +
> + /* store extension stack info, only for first fragment */
> + if (ip_ofs == 0) {
If we want it for first fragment only, why not invoke ip_frag_get_last_exthdr()
only when ip_ofs == 0?
> + /*
> + * fp->next_proto points to either the IP's next header
> + * or th next header of the extension before the fragment
> + * extension
> + */
> + fp->next_proto = (uint8_t *)&ip_hdr->proto;
> + if (exts_len > 0) {
> + fp->exts_len = exts_len;
> + fp->next_proto = last_ipv6_ext;
> + }
> + }
> +
> ip_frag_inuse(tbl, fp);
>
> IP_FRAG_LOG(DEBUG, "%s:%d:\n"
> --
> 2.34.1
@@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viacheslavo@nvidia.com> <viacheslavo@mellanox.com>
Victor Kaplansky <victork@redhat.com>
Victor Raj <victor.raj@intel.com>
Vidya Sagar Velumuri <vvelumuri@marvell.com>
+Vignesh PS <vignesh.purushotham.srinivas@ericsson.com> <vig.vigneshps1995@gmail.com>
Vignesh Sridhar <vignesh.sridhar@intel.com>
Vijayakumar Muthuvel Manickam <mmvijay@gmail.com>
Vijaya Mohan Guvva <vijay1054@gmail.com>
@@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
fp->total_size = UINT32_MAX;
fp->frag_size = 0;
fp->last_idx = IP_MIN_FRAG_NUM;
+ fp->exts_len = 0;
+ fp->next_proto = NULL;
fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
}
@@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt {
uint32_t total_size; /* expected reassembled size */
uint32_t frag_size; /* size of fragments received */
uint32_t last_idx; /* index of next entry to fill */
+ uint32_t exts_len; /* length of extension hdrs for first fragment */
+ uint8_t *next_proto; /* pointer of the next_proto field */
struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
};
@@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
/* update ipv6 header for the reassembled datagram */
ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len);
+ payload_len += fp->exts_len;
ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
/*
* remove fragmentation header. note that per RFC2460, we need to update
* the last non-fragmentable header with the "next header" field to contain
- * type of the first fragmentable header, but we currently don't support
- * other headers, so we assume there are no other headers and thus update
- * the main IPv6 header instead.
+ * type of the first fragmentable header.
*/
- move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
- frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1);
- ip_hdr->proto = frag_hdr->next_header;
+ frag_hdr = (struct rte_ipv6_fragment_ext *)
+ ((uint8_t *) (ip_hdr + 1) + fp->exts_len);
+ *fp->next_proto = frag_hdr->next_header;
+ move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
rte_pktmbuf_mtod(m, char*), move_len);
@@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
return m;
}
+/*
+ * Function to crawl through the extension header stack.
+ * This function breaks as soon a the fragment header is
+ * found and returns the total length the traversed exts
+ * and the last extension before the fragment header
+ */
+static inline uint32_t
+ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext)
+{
+ uint32_t total_len = 0;
+ uint8_t num_exts = 0;
+ size_t ext_len = 0;
+ *last_ext = (uint8_t *)(ip_hdr + 1);
+ int next_proto = ip_hdr->proto;
+#define MAX_NUM_IPV6_EXTS 8
+
+ while (next_proto != IPPROTO_FRAGMENT &&
+ num_exts < MAX_NUM_IPV6_EXTS &&
+ (next_proto = rte_ipv6_get_next_ext(
+ *last_ext, next_proto, &ext_len)) >= 0) {
+
+ total_len += ext_len;
+
+ if (next_proto == IPPROTO_FRAGMENT)
+ return total_len;
+
+ *last_ext += ext_len;
+ num_exts++;
+ }
+
+ return total_len;
+}
+
/*
* Process new mbuf with fragment of IPV6 datagram.
* Incoming mbuf should have its l2_len/l3_len fields setup correctly.
@@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
{
struct ip_frag_pkt *fp;
struct ip_frag_key key;
+ uint8_t *last_ipv6_ext;
+ uint32_t exts_len;
uint16_t ip_ofs;
int32_t ip_len;
int32_t trim;
@@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
/*
* as per RFC2460, payload length contains all extension headers
* as well.
- * since we don't support anything but frag headers,
- * this is what we remove from the payload len.
+ * so we remove the extension len from the payload len.
*/
- ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+ exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext);
+ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - sizeof(*frag_hdr);
trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
@@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
/* process the fragmented packet. */
mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
MORE_FRAGS(frag_hdr->frag_data));
+
+ /* store extension stack info, only for first fragment */
+ if (ip_ofs == 0) {
+ /*
+ * fp->next_proto points to either the IP's next header
+ * or th next header of the extension before the fragment
+ * extension
+ */
+ fp->next_proto = (uint8_t *)&ip_hdr->proto;
+ if (exts_len > 0) {
+ fp->exts_len = exts_len;
+ fp->next_proto = last_ipv6_ext;
+ }
+ }
+
ip_frag_inuse(tbl, fp);
IP_FRAG_LOG(DEBUG, "%s:%d:\n"