[v5,3/3] ip_frag: extend IPv6 fragment header retrieval

Message ID 20181031021758.66752-4-doucette@bu.edu
State New
Delegated to: Thomas Monjalon
Headers show
Series
  • Extend rte_ipv6_frag_get_ipv6_fragment_header()
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Cody Doucette Oct. 31, 2018, 2:17 a.m.
Add the ability to parse IPv6 extenders to find the
IPv6 fragment header, and update callers.

According to RFC 8200, there is no guarantee that the IPv6
Fragment extension header will come before any other extension
header, even though it is recommended.

Signed-off-by: Cody Doucette <doucette@bu.edu>
Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---
 examples/ip_reassembly/main.c               |  6 ++--
 lib/librte_ip_frag/rte_ip_frag.h            | 23 ++++++-------
 lib/librte_ip_frag/rte_ip_frag_version.map  |  1 +
 lib/librte_ip_frag/rte_ipv6_fragmentation.c | 38 +++++++++++++++++++++
 lib/librte_ip_frag/rte_ipv6_reassembly.c    |  4 +--
 lib/librte_port/rte_port_ras.c              |  6 ++--
 6 files changed, 59 insertions(+), 19 deletions(-)

Comments

Thomas Monjalon Oct. 31, 2018, 5:03 p.m. | #1
31/10/2018 03:17, Cody Doucette:
> Add the ability to parse IPv6 extenders to find the
> IPv6 fragment header, and update callers.
> 
> According to RFC 8200, there is no guarantee that the IPv6
> Fragment extension header will come before any other extension
> header, even though it is recommended.
> 
> Signed-off-by: Cody Doucette <doucette@bu.edu>
> Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
> Reviewed-by: Michel Machado <michel@digirati.com.br>

Konstantin, did you already ack this patch?
Ananyev, Konstantin Oct. 31, 2018, 7:56 p.m. | #2
Hi Cody,

> 
> Add the ability to parse IPv6 extenders to find the
> IPv6 fragment header, and update callers.
> 
> According to RFC 8200, there is no guarantee that the IPv6
> Fragment extension header will come before any other extension
> header, even though it is recommended.
> 
> Signed-off-by: Cody Doucette <doucette@bu.edu>
> Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
> Reviewed-by: Michel Machado <michel@digirati.com.br>
> ---
>  examples/ip_reassembly/main.c               |  6 ++--
>  lib/librte_ip_frag/rte_ip_frag.h            | 23 ++++++-------
>  lib/librte_ip_frag/rte_ip_frag_version.map  |  1 +
>  lib/librte_ip_frag/rte_ipv6_fragmentation.c | 38 +++++++++++++++++++++
>  lib/librte_ip_frag/rte_ipv6_reassembly.c    |  4 +--
>  lib/librte_port/rte_port_ras.c              |  6 ++--
>  6 files changed, 59 insertions(+), 19 deletions(-)
> 
> diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
> index 17b55d4c7..3a827bd6c 100644
> --- a/examples/ip_reassembly/main.c
> +++ b/examples/ip_reassembly/main.c
> @@ -365,12 +365,14 @@ reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
>  		eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
>  	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
>  		/* if packet is IPv6 */
> -		struct ipv6_extension_fragment *frag_hdr;
> +		const struct ipv6_extension_fragment *frag_hdr;
> +		struct ipv6_extension_fragment frag_hdr_buf;
>  		struct ipv6_hdr *ip_hdr;
> 
>  		ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
> 
> -		frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
> +		frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(m,
> +			ip_hdr, &frag_hdr_buf);

I looked at the patch once again, and it seems incomplete to me.
Sorry for late comments.
Yes, right now te_ipv6_frag_get_ipv6_fragment_header can properly
retrieve ipv6 fragment info, but it is not enough to make things work
for situation when we have packet with frag header not the first and only
extension header.
In the same function, few lines below, we setup l3_len based on that assumption:

m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);

mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);

And inside rte_ipv6_frag_reassemble_packet() we still assume the same:
...
frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
ip_hdr->proto = frag_hdr->next_header;

I think we need a function that would allow us to get offset of frag_hdr. 
Actually probably we can have a generic one here, that can return offset for
any requested ext header or total length of ipv6 header.
Something like that:

struct rte_ipv6_get_xhdr_ofs {
   uint16_t find_proto;    /* header proto to find */
   uint16_t  next_proto;  /* next header proto */
   uint32_t next_ofs;       /* offset to start search */
};

struct int 
rte_ipv6_get_xhdr_ofs(struct rte_mbuf *pkt, rte_ipv6_get_xhdr_ofs *find); 

that would go through ipv6 ext headers till either requested proto is found, or end of IPv6 header. 
Then user can do something like that:

/* find fragment extention */
ipv6_get_xhdr_ofs ofs = {
	.find_proto = IPPROTO_FRAGMENT,
	.next_proto = ipv6_hdr->proto,
	.ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);
if(rc == 0) 
     frag_hdr = rte_pktmbuf_mtod_offset(m, .., ofs.ofs);
...     

/* get size of IPv6 header plus all known extensions */
ipv6_get_xhdr_ofs ofs = {
	.find_proto = IPPROTO_MAX,
	.next_proto = ipv6_hdr->proto,
	.ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);     


> 
>  		if (frag_hdr != NULL) {
>  			struct rte_mbuf *mo;
> diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
> index 7f425f610..6fc8106bc 100644
> --- a/lib/librte_ip_frag/rte_ip_frag.h
> +++ b/lib/librte_ip_frag/rte_ip_frag.h
> @@ -211,28 +211,25 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
>  struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>  		struct rte_ip_frag_death_row *dr,
>  		struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
> -		struct ipv6_extension_fragment *frag_hdr);
> +		const struct ipv6_extension_fragment *frag_hdr);
> 
>  /**
>   * Return a pointer to the packet's fragment header, if found.
> - * It only looks at the extension header that's right after the fixed IPv6
> - * header, and doesn't follow the whole chain of extension headers.
>   *
> - * @param hdr
> + * @param pkt
> + *   Pointer to the mbuf of the packet.
> + * @param ip_hdr
>   *   Pointer to the IPv6 header.
> + * @param frag_hdr
> + *   A pointer to the buffer where the fragment header
> + *   will be copied if it is not contiguous in mbuf data.
>   * @return
>   *   Pointer to the IPv6 fragment extension header, or NULL if it's not
>   *   present.
>   */
> -static inline struct ipv6_extension_fragment *
> -rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
> -{
> -	if (hdr->proto == IPPROTO_FRAGMENT) {
> -		return (struct ipv6_extension_fragment *) ++hdr;
> -	}
> -	else
> -		return NULL;
> -}
> +const struct ipv6_extension_fragment *rte_ipv6_frag_get_ipv6_fragment_header(
> +		struct rte_mbuf *pkt, const struct ipv6_hdr *ip_hdr,
> +		struct ipv6_extension_fragment *frag_hdr);

Another thing - wouldn't it be ab API/ABI breakage?
One more question - making it non-inline - how much it would  affect performance?
My guess - no big difference, but did you check?
Konstantin

> 
>  /**
>   * IPv4 fragmentation.
> diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
> index d40d5515f..8b4c82d08 100644
> --- a/lib/librte_ip_frag/rte_ip_frag_version.map
> +++ b/lib/librte_ip_frag/rte_ip_frag_version.map
> @@ -8,6 +8,7 @@ DPDK_2.0 {
>  	rte_ipv4_fragment_packet;
>  	rte_ipv6_frag_reassemble_packet;
>  	rte_ipv6_fragment_packet;
> +	rte_ipv6_frag_get_ipv6_fragment_header;
> 
>  	local: *;
>  };
> diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> index 62a7e4e83..bd847dd3d 100644
> --- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> @@ -176,3 +176,41 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
> 
>  	return out_pkt_pos;
>  }
> +
> +const struct ipv6_extension_fragment *
> +rte_ipv6_frag_get_ipv6_fragment_header(struct rte_mbuf *pkt,
> +	const struct ipv6_hdr *ip_hdr,
> +	struct ipv6_extension_fragment *frag_hdr)
> +{
> +	size_t offset = sizeof(struct ipv6_hdr);
> +	uint8_t nexthdr = ip_hdr->proto;
> +
> +	while (ipv6_ext_hdr(nexthdr)) {
> +		struct ipv6_opt_hdr opt;
> +		const struct ipv6_opt_hdr *popt = rte_pktmbuf_read(pkt,
> +			offset, sizeof(opt), &opt);
> +		if (popt == NULL)
> +			return NULL;
> +
> +		switch (nexthdr) {
> +		case IPPROTO_NONE:
> +			return NULL;
> +
> +		case IPPROTO_FRAGMENT:
> +			return rte_pktmbuf_read(pkt, offset,
> +				sizeof(*frag_hdr), frag_hdr);
> +
> +		case IPPROTO_AH:
> +			offset += (popt->hdrlen + 2) << 2;
> +			break;
> +
> +		default:
> +			offset += (popt->hdrlen + 1) << 3;
> +			break;
> +		}
> +
> +		nexthdr = popt->nexthdr;
> +	}
> +
> +	return NULL;
> +}
> diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> index db249fe60..b2d67a3f0 100644
> --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> @@ -135,8 +135,8 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
>  struct rte_mbuf *
>  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> -		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> -		struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
> +	struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> +	struct ipv6_hdr *ip_hdr, const struct ipv6_extension_fragment *frag_hdr)
>  {
>  	struct ip_frag_pkt *fp;
>  	struct ip_frag_key key;
> diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
> index c8b2e19bf..28764f744 100644
> --- a/lib/librte_port/rte_port_ras.c
> +++ b/lib/librte_port/rte_port_ras.c
> @@ -184,9 +184,11 @@ process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
>  	/* Assume there is no ethernet header */
>  	struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *);
> 
> -	struct ipv6_extension_fragment *frag_hdr;
> +	const struct ipv6_extension_fragment *frag_hdr;
> +	struct ipv6_extension_fragment frag_hdr_buf;
>  	uint16_t frag_data = 0;
> -	frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
> +	frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt, pkt_hdr,
> +		&frag_hdr_buf);
>  	if (frag_hdr != NULL)
>  		frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);
> 
> --
> 2.17.1
Cody Doucette Nov. 7, 2018, 8:21 p.m. | #3
Hey Konstantin,

Thanks for reviewing -- I see your point about this patch only removing one
of the places where the code makes assumptions about the position of the
fragmentation header.

Unfortunately at the moment I don't have the resources to dedicate to
writing the complete solution and doing a performance check, so I think I
should withdraw the patch. I hope it at least serves as a blueprint in case
someone comes back to it.

I might be able to come back to it eventually, but likely not soon.

Thanks,
Cody

On Wed, Oct 31, 2018 at 3:57 PM Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:

> Hi Cody,
>
> >
> > Add the ability to parse IPv6 extenders to find the
> > IPv6 fragment header, and update callers.
> >
> > According to RFC 8200, there is no guarantee that the IPv6
> > Fragment extension header will come before any other extension
> > header, even though it is recommended.
> >
> > Signed-off-by: Cody Doucette <doucette@bu.edu>
> > Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
> > Reviewed-by: Michel Machado <michel@digirati.com.br>
> > ---
> >  examples/ip_reassembly/main.c               |  6 ++--
> >  lib/librte_ip_frag/rte_ip_frag.h            | 23 ++++++-------
> >  lib/librte_ip_frag/rte_ip_frag_version.map  |  1 +
> >  lib/librte_ip_frag/rte_ipv6_fragmentation.c | 38 +++++++++++++++++++++
> >  lib/librte_ip_frag/rte_ipv6_reassembly.c    |  4 +--
> >  lib/librte_port/rte_port_ras.c              |  6 ++--
> >  6 files changed, 59 insertions(+), 19 deletions(-)
> >
> > diff --git a/examples/ip_reassembly/main.c
> b/examples/ip_reassembly/main.c
> > index 17b55d4c7..3a827bd6c 100644
> > --- a/examples/ip_reassembly/main.c
> > +++ b/examples/ip_reassembly/main.c
> > @@ -365,12 +365,14 @@ reassemble(struct rte_mbuf *m, uint16_t portid,
> uint32_t queue,
> >               eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
> >       } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
> >               /* if packet is IPv6 */
> > -             struct ipv6_extension_fragment *frag_hdr;
> > +             const struct ipv6_extension_fragment *frag_hdr;
> > +             struct ipv6_extension_fragment frag_hdr_buf;
> >               struct ipv6_hdr *ip_hdr;
> >
> >               ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
> >
> > -             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
> > +             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(m,
> > +                     ip_hdr, &frag_hdr_buf);
>
> I looked at the patch once again, and it seems incomplete to me.
> Sorry for late comments.
> Yes, right now te_ipv6_frag_get_ipv6_fragment_header can properly
> retrieve ipv6 fragment info, but it is not enough to make things work
> for situation when we have packet with frag header not the first and only
> extension header.
> In the same function, few lines below, we setup l3_len based on that
> assumption:
>
> m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);
>
> mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);
>
> And inside rte_ipv6_frag_reassemble_packet() we still assume the same:
> ...
> frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
> ip_hdr->proto = frag_hdr->next_header;
>
> I think we need a function that would allow us to get offset of frag_hdr.
> Actually probably we can have a generic one here, that can return offset
> for
> any requested ext header or total length of ipv6 header.
> Something like that:
>
> struct rte_ipv6_get_xhdr_ofs {
>    uint16_t find_proto;    /* header proto to find */
>    uint16_t  next_proto;  /* next header proto */
>    uint32_t next_ofs;       /* offset to start search */
> };
>
> struct int
> rte_ipv6_get_xhdr_ofs(struct rte_mbuf *pkt, rte_ipv6_get_xhdr_ofs *find);
>
> that would go through ipv6 ext headers till either requested proto is
> found, or end of IPv6 header.
> Then user can do something like that:
>
> /* find fragment extention */
> ipv6_get_xhdr_ofs ofs = {
>         .find_proto = IPPROTO_FRAGMENT,
>         .next_proto = ipv6_hdr->proto,
>         .ofs = sizeof(struct ipv6_hdr),
> };
> rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);
> if(rc == 0)
>      frag_hdr = rte_pktmbuf_mtod_offset(m, .., ofs.ofs);
> ...
>
> /* get size of IPv6 header plus all known extensions */
> ipv6_get_xhdr_ofs ofs = {
>         .find_proto = IPPROTO_MAX,
>         .next_proto = ipv6_hdr->proto,
>         .ofs = sizeof(struct ipv6_hdr),
> };
> rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);
>
>
> >
> >               if (frag_hdr != NULL) {
> >                       struct rte_mbuf *mo;
> > diff --git a/lib/librte_ip_frag/rte_ip_frag.h
> b/lib/librte_ip_frag/rte_ip_frag.h
> > index 7f425f610..6fc8106bc 100644
> > --- a/lib/librte_ip_frag/rte_ip_frag.h
> > +++ b/lib/librte_ip_frag/rte_ip_frag.h
> > @@ -211,28 +211,25 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
> >  struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl
> *tbl,
> >               struct rte_ip_frag_death_row *dr,
> >               struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
> > -             struct ipv6_extension_fragment *frag_hdr);
> > +             const struct ipv6_extension_fragment *frag_hdr);
> >
> >  /**
> >   * Return a pointer to the packet's fragment header, if found.
> > - * It only looks at the extension header that's right after the fixed
> IPv6
> > - * header, and doesn't follow the whole chain of extension headers.
> >   *
> > - * @param hdr
> > + * @param pkt
> > + *   Pointer to the mbuf of the packet.
> > + * @param ip_hdr
> >   *   Pointer to the IPv6 header.
> > + * @param frag_hdr
> > + *   A pointer to the buffer where the fragment header
> > + *   will be copied if it is not contiguous in mbuf data.
> >   * @return
> >   *   Pointer to the IPv6 fragment extension header, or NULL if it's not
> >   *   present.
> >   */
> > -static inline struct ipv6_extension_fragment *
> > -rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
> > -{
> > -     if (hdr->proto == IPPROTO_FRAGMENT) {
> > -             return (struct ipv6_extension_fragment *) ++hdr;
> > -     }
> > -     else
> > -             return NULL;
> > -}
> > +const struct ipv6_extension_fragment
> *rte_ipv6_frag_get_ipv6_fragment_header(
> > +             struct rte_mbuf *pkt, const struct ipv6_hdr *ip_hdr,
> > +             struct ipv6_extension_fragment *frag_hdr);
>
> Another thing - wouldn't it be ab API/ABI breakage?
> One more question - making it non-inline - how much it would  affect
> performance?
> My guess - no big difference, but did you check?
> Konstantin
>
> >
> >  /**
> >   * IPv4 fragmentation.
> > diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map
> b/lib/librte_ip_frag/rte_ip_frag_version.map
> > index d40d5515f..8b4c82d08 100644
> > --- a/lib/librte_ip_frag/rte_ip_frag_version.map
> > +++ b/lib/librte_ip_frag/rte_ip_frag_version.map
> > @@ -8,6 +8,7 @@ DPDK_2.0 {
> >       rte_ipv4_fragment_packet;
> >       rte_ipv6_frag_reassemble_packet;
> >       rte_ipv6_fragment_packet;
> > +     rte_ipv6_frag_get_ipv6_fragment_header;
> >
> >       local: *;
> >  };
> > diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> > index 62a7e4e83..bd847dd3d 100644
> > --- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> > +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> > @@ -176,3 +176,41 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
> >
> >       return out_pkt_pos;
> >  }
> > +
> > +const struct ipv6_extension_fragment *
> > +rte_ipv6_frag_get_ipv6_fragment_header(struct rte_mbuf *pkt,
> > +     const struct ipv6_hdr *ip_hdr,
> > +     struct ipv6_extension_fragment *frag_hdr)
> > +{
> > +     size_t offset = sizeof(struct ipv6_hdr);
> > +     uint8_t nexthdr = ip_hdr->proto;
> > +
> > +     while (ipv6_ext_hdr(nexthdr)) {
> > +             struct ipv6_opt_hdr opt;
> > +             const struct ipv6_opt_hdr *popt = rte_pktmbuf_read(pkt,
> > +                     offset, sizeof(opt), &opt);
> > +             if (popt == NULL)
> > +                     return NULL;
> > +
> > +             switch (nexthdr) {
> > +             case IPPROTO_NONE:
> > +                     return NULL;
> > +
> > +             case IPPROTO_FRAGMENT:
> > +                     return rte_pktmbuf_read(pkt, offset,
> > +                             sizeof(*frag_hdr), frag_hdr);
> > +
> > +             case IPPROTO_AH:
> > +                     offset += (popt->hdrlen + 2) << 2;
> > +                     break;
> > +
> > +             default:
> > +                     offset += (popt->hdrlen + 1) << 3;
> > +                     break;
> > +             }
> > +
> > +             nexthdr = popt->nexthdr;
> > +     }
> > +
> > +     return NULL;
> > +}
> > diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c
> b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> > index db249fe60..b2d67a3f0 100644
> > --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
> > +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> > @@ -135,8 +135,8 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
> >  #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
> >  struct rte_mbuf *
> >  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> > -             struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
> uint64_t tms,
> > -             struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment
> *frag_hdr)
> > +     struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t
> tms,
> > +     struct ipv6_hdr *ip_hdr, const struct ipv6_extension_fragment
> *frag_hdr)
> >  {
> >       struct ip_frag_pkt *fp;
> >       struct ip_frag_key key;
> > diff --git a/lib/librte_port/rte_port_ras.c
> b/lib/librte_port/rte_port_ras.c
> > index c8b2e19bf..28764f744 100644
> > --- a/lib/librte_port/rte_port_ras.c
> > +++ b/lib/librte_port/rte_port_ras.c
> > @@ -184,9 +184,11 @@ process_ipv6(struct rte_port_ring_writer_ras *p,
> struct rte_mbuf *pkt)
> >       /* Assume there is no ethernet header */
> >       struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr
> *);
> >
> > -     struct ipv6_extension_fragment *frag_hdr;
> > +     const struct ipv6_extension_fragment *frag_hdr;
> > +     struct ipv6_extension_fragment frag_hdr_buf;
> >       uint16_t frag_data = 0;
> > -     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
> > +     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt, pkt_hdr,
> > +             &frag_hdr_buf);
> >       if (frag_hdr != NULL)
> >               frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);
> >
> > --
> > 2.17.1
>
>
Ananyev, Konstantin Nov. 7, 2018, 11 p.m. | #4
Hi Cody,

>Hey Konstantin,
>Thanks for reviewing -- I see your point about this patch only removing one of the places where the code makes assumptions about the position of the fragmentation header.

>Unfortunately at the moment I don't have the resources to dedicate to writing the complete solution and doing a performance check, so I think I should withdraw the patch.

Ok, NP that's understandable.
Again I provided my comments quite late.

> I hope it at least serves as a blueprint in case someone comes back to it.

Yes, I think it definitely will be useful.

>I might be able to come back to it eventually, but likely not soon.

Hopefully you'll will :)
Thanks
Konstantin

> 
> Add the ability to parse IPv6 extenders to find the
> IPv6 fragment header, and update callers.
> 
> According to RFC 8200, there is no guarantee that the IPv6
> Fragment extension header will come before any other extension
> header, even though it is recommended.
> 
> Signed-off-by: Cody Doucette <doucette@bu.edu>
> Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
> Reviewed-by: Michel Machado <michel@digirati.com.br>
> ---
>  examples/ip_reassembly/main.c               |  6 ++--
>  lib/librte_ip_frag/rte_ip_frag.h            | 23 ++++++-------
>  lib/librte_ip_frag/rte_ip_frag_version.map  |  1 +
>  lib/librte_ip_frag/rte_ipv6_fragmentation.c | 38 +++++++++++++++++++++
>  lib/librte_ip_frag/rte_ipv6_reassembly.c    |  4 +--
>  lib/librte_port/rte_port_ras.c              |  6 ++--
>  6 files changed, 59 insertions(+), 19 deletions(-)
> 
> diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
> index 17b55d4c7..3a827bd6c 100644
> --- a/examples/ip_reassembly/main.c
> +++ b/examples/ip_reassembly/main.c
> @@ -365,12 +365,14 @@ reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
>               eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
>       } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
>               /* if packet is IPv6 */
> -             struct ipv6_extension_fragment *frag_hdr;
> +             const struct ipv6_extension_fragment *frag_hdr;
> +             struct ipv6_extension_fragment frag_hdr_buf;
>               struct ipv6_hdr *ip_hdr;
> 
>               ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
> 
> -             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
> +             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(m,
> +                     ip_hdr, &frag_hdr_buf);

I looked at the patch once again, and it seems incomplete to me.
Sorry for late comments.
Yes, right now te_ipv6_frag_get_ipv6_fragment_header can properly
retrieve ipv6 fragment info, but it is not enough to make things work
for situation when we have packet with frag header not the first and only
extension header.
In the same function, few lines below, we setup l3_len based on that assumption:

m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);

mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);

And inside rte_ipv6_frag_reassemble_packet() we still assume the same:
...
frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
ip_hdr->proto = frag_hdr->next_header;

I think we need a function that would allow us to get offset of frag_hdr. 
Actually probably we can have a generic one here, that can return offset for
any requested ext header or total length of ipv6 header.
Something like that:

struct rte_ipv6_get_xhdr_ofs {
   uint16_t find_proto;    /* header proto to find */
   uint16_t  next_proto;  /* next header proto */
   uint32_t next_ofs;       /* offset to start search */
};

struct int 
rte_ipv6_get_xhdr_ofs(struct rte_mbuf *pkt, rte_ipv6_get_xhdr_ofs *find); 

that would go through ipv6 ext headers till either requested proto is found, or end of IPv6 header. 
Then user can do something like that:

/* find fragment extention */
ipv6_get_xhdr_ofs ofs = {
        .find_proto = IPPROTO_FRAGMENT,
        .next_proto = ipv6_hdr->proto,
        .ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);
if(rc == 0) 
     frag_hdr = rte_pktmbuf_mtod_offset(m, .., ofs.ofs);
...     

/* get size of IPv6 header plus all known extensions */
ipv6_get_xhdr_ofs ofs = {
        .find_proto = IPPROTO_MAX,
        .next_proto = ipv6_hdr->proto,
        .ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);     


> 
>               if (frag_hdr != NULL) {
>                       struct rte_mbuf *mo;
> diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
> index 7f425f610..6fc8106bc 100644
> --- a/lib/librte_ip_frag/rte_ip_frag.h
> +++ b/lib/librte_ip_frag/rte_ip_frag.h
> @@ -211,28 +211,25 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
>  struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>               struct rte_ip_frag_death_row *dr,
>               struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
> -             struct ipv6_extension_fragment *frag_hdr);
> +             const struct ipv6_extension_fragment *frag_hdr);
> 
>  /**
>   * Return a pointer to the packet's fragment header, if found.
> - * It only looks at the extension header that's right after the fixed IPv6
> - * header, and doesn't follow the whole chain of extension headers.
>   *
> - * @param hdr
> + * @param pkt
> + *   Pointer to the mbuf of the packet.
> + * @param ip_hdr
>   *   Pointer to the IPv6 header.
> + * @param frag_hdr
> + *   A pointer to the buffer where the fragment header
> + *   will be copied if it is not contiguous in mbuf data.
>   * @return
>   *   Pointer to the IPv6 fragment extension header, or NULL if it's not
>   *   present.
>   */
> -static inline struct ipv6_extension_fragment *
> -rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
> -{
> -     if (hdr->proto == IPPROTO_FRAGMENT) {
> -             return (struct ipv6_extension_fragment *) ++hdr;
> -     }
> -     else
> -             return NULL;
> -}
> +const struct ipv6_extension_fragment *rte_ipv6_frag_get_ipv6_fragment_header(
> +             struct rte_mbuf *pkt, const struct ipv6_hdr *ip_hdr,
> +             struct ipv6_extension_fragment *frag_hdr);

Another thing - wouldn't it be ab API/ABI breakage?
One more question - making it non-inline - how much it would  affect performance?
My guess - no big difference, but did you check?
Konstantin

> 
>  /**
>   * IPv4 fragmentation.
> diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
> index d40d5515f..8b4c82d08 100644
> --- a/lib/librte_ip_frag/rte_ip_frag_version.map
> +++ b/lib/librte_ip_frag/rte_ip_frag_version.map
> @@ -8,6 +8,7 @@ DPDK_2.0 {
>       rte_ipv4_fragment_packet;
>       rte_ipv6_frag_reassemble_packet;
>       rte_ipv6_fragment_packet;
> +     rte_ipv6_frag_get_ipv6_fragment_header;
> 
>       local: *;
>  };
> diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> index 62a7e4e83..bd847dd3d 100644
> --- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> @@ -176,3 +176,41 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
> 
>       return out_pkt_pos;
>  }
> +
> +const struct ipv6_extension_fragment *
> +rte_ipv6_frag_get_ipv6_fragment_header(struct rte_mbuf *pkt,
> +     const struct ipv6_hdr *ip_hdr,
> +     struct ipv6_extension_fragment *frag_hdr)
> +{
> +     size_t offset = sizeof(struct ipv6_hdr);
> +     uint8_t nexthdr = ip_hdr->proto;
> +
> +     while (ipv6_ext_hdr(nexthdr)) {
> +             struct ipv6_opt_hdr opt;
> +             const struct ipv6_opt_hdr *popt = rte_pktmbuf_read(pkt,
> +                     offset, sizeof(opt), &opt);
> +             if (popt == NULL)
> +                     return NULL;
> +
> +             switch (nexthdr) {
> +             case IPPROTO_NONE:
> +                     return NULL;
> +
> +             case IPPROTO_FRAGMENT:
> +                     return rte_pktmbuf_read(pkt, offset,
> +                             sizeof(*frag_hdr), frag_hdr);
> +
> +             case IPPROTO_AH:
> +                     offset += (popt->hdrlen + 2) << 2;
> +                     break;
> +
> +             default:
> +                     offset += (popt->hdrlen + 1) << 3;
> +                     break;
> +             }
> +
> +             nexthdr = popt->nexthdr;
> +     }
> +
> +     return NULL;
> +}
> diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> index db249fe60..b2d67a3f0 100644
> --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> @@ -135,8 +135,8 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
>  struct rte_mbuf *
>  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> -             struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> -             struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
> +     struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> +     struct ipv6_hdr *ip_hdr, const struct ipv6_extension_fragment *frag_hdr)
>  {
>       struct ip_frag_pkt *fp;
>       struct ip_frag_key key;
> diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
> index c8b2e19bf..28764f744 100644
> --- a/lib/librte_port/rte_port_ras.c
> +++ b/lib/librte_port/rte_port_ras.c
> @@ -184,9 +184,11 @@ process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
>       /* Assume there is no ethernet header */
>       struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *);
> 
> -     struct ipv6_extension_fragment *frag_hdr;
> +     const struct ipv6_extension_fragment *frag_hdr;
> +     struct ipv6_extension_fragment frag_hdr_buf;
>       uint16_t frag_data = 0;
> -     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
> +     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt, pkt_hdr,
> +             &frag_hdr_buf);
>       if (frag_hdr != NULL)
>               frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);
> 
> --
> 2.17.1

Patch

diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
index 17b55d4c7..3a827bd6c 100644
--- a/examples/ip_reassembly/main.c
+++ b/examples/ip_reassembly/main.c
@@ -365,12 +365,14 @@  reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
 		eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
 	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
 		/* if packet is IPv6 */
-		struct ipv6_extension_fragment *frag_hdr;
+		const struct ipv6_extension_fragment *frag_hdr;
+		struct ipv6_extension_fragment frag_hdr_buf;
 		struct ipv6_hdr *ip_hdr;
 
 		ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
 
-		frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
+		frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(m,
+			ip_hdr, &frag_hdr_buf);
 
 		if (frag_hdr != NULL) {
 			struct rte_mbuf *mo;
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 7f425f610..6fc8106bc 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -211,28 +211,25 @@  rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
 		struct rte_ip_frag_death_row *dr,
 		struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
-		struct ipv6_extension_fragment *frag_hdr);
+		const struct ipv6_extension_fragment *frag_hdr);
 
 /**
  * Return a pointer to the packet's fragment header, if found.
- * It only looks at the extension header that's right after the fixed IPv6
- * header, and doesn't follow the whole chain of extension headers.
  *
- * @param hdr
+ * @param pkt
+ *   Pointer to the mbuf of the packet.
+ * @param ip_hdr
  *   Pointer to the IPv6 header.
+ * @param frag_hdr
+ *   A pointer to the buffer where the fragment header
+ *   will be copied if it is not contiguous in mbuf data.
  * @return
  *   Pointer to the IPv6 fragment extension header, or NULL if it's not
  *   present.
  */
-static inline struct ipv6_extension_fragment *
-rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
-{
-	if (hdr->proto == IPPROTO_FRAGMENT) {
-		return (struct ipv6_extension_fragment *) ++hdr;
-	}
-	else
-		return NULL;
-}
+const struct ipv6_extension_fragment *rte_ipv6_frag_get_ipv6_fragment_header(
+		struct rte_mbuf *pkt, const struct ipv6_hdr *ip_hdr,
+		struct ipv6_extension_fragment *frag_hdr);
 
 /**
  * IPv4 fragmentation.
diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
index d40d5515f..8b4c82d08 100644
--- a/lib/librte_ip_frag/rte_ip_frag_version.map
+++ b/lib/librte_ip_frag/rte_ip_frag_version.map
@@ -8,6 +8,7 @@  DPDK_2.0 {
 	rte_ipv4_fragment_packet;
 	rte_ipv6_frag_reassemble_packet;
 	rte_ipv6_fragment_packet;
+	rte_ipv6_frag_get_ipv6_fragment_header;
 
 	local: *;
 };
diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
index 62a7e4e83..bd847dd3d 100644
--- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -176,3 +176,41 @@  rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 
 	return out_pkt_pos;
 }
+
+const struct ipv6_extension_fragment *
+rte_ipv6_frag_get_ipv6_fragment_header(struct rte_mbuf *pkt,
+	const struct ipv6_hdr *ip_hdr,
+	struct ipv6_extension_fragment *frag_hdr)
+{
+	size_t offset = sizeof(struct ipv6_hdr);
+	uint8_t nexthdr = ip_hdr->proto;
+
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr opt;
+		const struct ipv6_opt_hdr *popt = rte_pktmbuf_read(pkt,
+			offset, sizeof(opt), &opt);
+		if (popt == NULL)
+			return NULL;
+
+		switch (nexthdr) {
+		case IPPROTO_NONE:
+			return NULL;
+
+		case IPPROTO_FRAGMENT:
+			return rte_pktmbuf_read(pkt, offset,
+				sizeof(*frag_hdr), frag_hdr);
+
+		case IPPROTO_AH:
+			offset += (popt->hdrlen + 2) << 2;
+			break;
+
+		default:
+			offset += (popt->hdrlen + 1) << 3;
+			break;
+		}
+
+		nexthdr = popt->nexthdr;
+	}
+
+	return NULL;
+}
diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
index db249fe60..b2d67a3f0 100644
--- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -135,8 +135,8 @@  ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
 struct rte_mbuf *
 rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
-		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
-		struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
+	struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+	struct ipv6_hdr *ip_hdr, const struct ipv6_extension_fragment *frag_hdr)
 {
 	struct ip_frag_pkt *fp;
 	struct ip_frag_key key;
diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
index c8b2e19bf..28764f744 100644
--- a/lib/librte_port/rte_port_ras.c
+++ b/lib/librte_port/rte_port_ras.c
@@ -184,9 +184,11 @@  process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
 	/* Assume there is no ethernet header */
 	struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *);
 
-	struct ipv6_extension_fragment *frag_hdr;
+	const struct ipv6_extension_fragment *frag_hdr;
+	struct ipv6_extension_fragment frag_hdr_buf;
 	uint16_t frag_data = 0;
-	frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
+	frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt, pkt_hdr,
+		&frag_hdr_buf);
 	if (frag_hdr != NULL)
 		frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);