[RFC,8/8] ip_frag: fix gcc-12 warnings

Message ID 20220607171746.461772-9-stephen@networkplumber.org (mailing list archive)
State Rejected, archived
Delegated to: David Marchand
Headers
Series Gcc-12 warning fixes |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues
ci/intel-Testing success Testing PASS

Commit Message

Stephen Hemminger June 7, 2022, 5:17 p.m. UTC
  The function rte_memcpy can derference past source buffer which
will cause array out of bounds warnings. But there is no good reason
to use rte_memcpy instead of memcpy in this code. Memcpy is just
as fast for these small inputs, and compiler will optimize.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/ip_frag/rte_ipv4_fragmentation.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
  

Comments

Konstantin Ananyev June 8, 2022, 8:19 a.m. UTC | #1
07/06/2022 18:17, Stephen Hemminger пишет:
> The function rte_memcpy can derference past source buffer which
> will cause array out of bounds warnings. But there is no good reason
> to use rte_memcpy instead of memcpy in this code. Memcpy is just
> as fast for these small inputs, and compiler will optimize.


AFAIK, rte_memcpy() will outperform memcpy() when _size_ parameter
is a variable. Unfortunately that's exactly the case here.
So not sure it is a good change, at least without extensive perf testing.
BTW, if rte_memcpy() really access src buffer beyond it's boundaries,
I think that's definitely a bug that needs to be fixed.


> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>   lib/ip_frag/rte_ipv4_fragmentation.c | 7 +++----
>   1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c b/lib/ip_frag/rte_ipv4_fragmentation.c
> index a19f6fda6408..27a8ad224dec 100644
> --- a/lib/ip_frag/rte_ipv4_fragmentation.c
> +++ b/lib/ip_frag/rte_ipv4_fragmentation.c
> @@ -5,7 +5,6 @@
>   #include <stddef.h>
>   #include <errno.h>
>   
> -#include <rte_memcpy.h>
>   #include <rte_ether.h>
>   
>   #include "ip_frag_common.h"
> @@ -26,7 +25,7 @@ static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst,
>   		const struct rte_ipv4_hdr *src, uint16_t header_len,
>   		uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf)
>   {
> -	rte_memcpy(dst, src, header_len);
> +	memcpy(dst, src, header_len);
>   	fofs = (uint16_t)(fofs + (dofs >> RTE_IPV4_HDR_FO_SHIFT));
>   	fofs = (uint16_t)(fofs | mf << RTE_IPV4_HDR_MF_SHIFT);
>   	dst->fragment_offset = rte_cpu_to_be_16(fofs);
> @@ -48,7 +47,7 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>   	struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
>   
>   	ipopt_len = 0;
> -	rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
> +	memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
>   	ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr);
>   
>   	uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr);
> @@ -65,7 +64,7 @@ static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
>   			break;
>   
>   		if (RTE_IPV4_HDR_OPT_COPIED(*p_opt)) {
> -			rte_memcpy(ipopt_frag_hdr + ipopt_len,
> +			memcpy(ipopt_frag_hdr + ipopt_len,
>   				p_opt, p_opt[1]);
>   			ipopt_len += p_opt[1];
>   		}
  
Stephen Hemminger June 8, 2022, 3:26 p.m. UTC | #2
On Wed, 8 Jun 2022 09:19:20 +0100
Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> wrote:

> 07/06/2022 18:17, Stephen Hemminger пишет:
> > The function rte_memcpy can derference past source buffer which
> > will cause array out of bounds warnings. But there is no good reason
> > to use rte_memcpy instead of memcpy in this code. Memcpy is just
> > as fast for these small inputs, and compiler will optimize.  
> 
> 
> AFAIK, rte_memcpy() will outperform memcpy() when _size_ parameter
> is a variable. Unfortunately that's exactly the case here.
> So not sure it is a good change, at least without extensive perf testing.
> BTW, if rte_memcpy() really access src buffer beyond it's boundaries,
> I think that's definitely a bug that needs to be fixed.

Yes and no.
IMHO DPDK should not in the C library business, and glibc etc should be
more optimized if necessary.


The ip_frag warning with rte_memcpy in full is:

[296/3606] Compiling C object lib/libr...a.p/ip_frag_rte_ipv4_fragmentation.c.o
In file included from /usr/lib/gcc/x86_64-linux-gnu/12/include/immintrin.h:43,
                 from /usr/lib/gcc/x86_64-linux-gnu/12/include/x86intrin.h:32,
                 from ../lib/eal/x86/include/rte_vect.h:31,
                 from ../lib/eal/x86/include/rte_memcpy.h:17,
                 from ../lib/ip_frag/rte_ipv4_fragmentation.c:8:
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_mov128’ at ../lib/eal/x86/include/rte_memcpy.h:369:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:445:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript ‘__m256i_u[1]’ is partly outside array bounds of ‘uint8_t[60]’ {aka ‘unsigned char[60]’} [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [52, 60] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_mov128’ at ../lib/eal/x86/include/rte_memcpy.h:370:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:445:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript [2, 3] is outside array bounds of ‘uint8_t[60]’ {aka ‘unsigned char[60]’} [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [84, 124] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_mov128’ at ../lib/eal/x86/include/rte_memcpy.h:371:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:445:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript [3, 4] is outside array bounds of ‘uint8_t[60]’ {aka ‘unsigned char[60]’} [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [116, 156] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_mov64’ at ../lib/eal/x86/include/rte_memcpy.h:358:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:452:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript ‘__m256i_u[1]’ is partly outside array bounds of ‘void[60]’ [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [180, 240] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [52, 60] into object ‘ipopt_frag_hdr’ of size 60
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:457:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript [2, 7] is outside array bounds of ‘void[60]’ [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [148, 272] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [148, 272] into object ‘ipopt_frag_hdr’ of size 60
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [20, 60] into object ‘ipopt_frag_hdr’ of size 60
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:458:4,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript [2, 8] is outside array bounds of ‘void[60]’ [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [149, 273] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
      |                 ^~~~~~~~~~~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [149, 273] into object ‘ipopt_frag_hdr’ of size 60
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [21, 60] into object ‘ipopt_frag_hdr’ of size 60
In function ‘_mm256_storeu_si256’,
    inlined from ‘rte_mov32’ at ../lib/eal/x86/include/rte_memcpy.h:347:2,
    inlined from ‘rte_memcpy_generic’ at ../lib/eal/x86/include/rte_memcpy.h:438:3,
    inlined from ‘rte_memcpy’ at ../lib/eal/x86/include/rte_memcpy.h:853:10,
    inlined from ‘__create_ipopt_frag_hdr’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:68:4,
    inlined from ‘rte_ipv4_fragment_packet’ at ../lib/ip_frag/rte_ipv4_fragmentation.c:242:16:
/usr/lib/gcc/x86_64-linux-gnu/12/include/avxintrin.h:935:8: warning: array subscript ‘__m256i_u[1]’ is partly outside array bounds of ‘uint8_t[60]’ {aka ‘unsigned char[60]’} [-Warray-bounds]
  935 |   *__P = __A;
      |   ~~~~~^~~~~
../lib/ip_frag/rte_ipv4_fragmentation.c: In function ‘rte_ipv4_fragment_packet’:
../lib/ip_frag/rte_ipv4_fragmentation.c:122:17: note: at offset [37, 60] into object ‘ipopt_frag_hdr’ of size 60
  122 |         uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN];
  
Morten Brørup June 9, 2022, 7:09 a.m. UTC | #3
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, 8 June 2022 17.27
> 
> On Wed, 8 Jun 2022 09:19:20 +0100
> Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> wrote:
> 
> > 07/06/2022 18:17, Stephen Hemminger пишет:
> > > The function rte_memcpy can derference past source buffer which
> > > will cause array out of bounds warnings. But there is no good
> reason
> > > to use rte_memcpy instead of memcpy in this code. Memcpy is just
> > > as fast for these small inputs, and compiler will optimize.
> >
> >
> > AFAIK, rte_memcpy() will outperform memcpy() when _size_ parameter
> > is a variable. Unfortunately that's exactly the case here.
> > So not sure it is a good change, at least without extensive perf
> testing.
> > BTW, if rte_memcpy() really access src buffer beyond it's boundaries,
> > I think that's definitely a bug that needs to be fixed.
> 
> Yes and no.
> IMHO DPDK should not in the C library business, and glibc etc should be
> more optimized if necessary.

A very big +1 to that!

DPDK contains a lot of optimizations that really belong in the compiler and/or C library, but weren't back then, so the clever DPDK developers put them inside DPDK instead.

Over time, the compilers and C libraries have improved, and many of these manually implemented optimizations have become obsolete. They should be cleaned up and replaced by simpler code, and the documentation about optimizing code should be updated accordingly.

Until that happens, we have to expect contributors to use rte_memcpy() and other obsolete optimizations - they are only doing what the DPDK documentation and reference code tells them. Just like application developers are using KNI, because it is so heavily promoted in DPDK documentation.

The DPDK community has a very high focus on the risk of performance regressions when touching DPDK Core libraries, so a general cleaning is probably not going to happen. Luckily, there are exceptions to every rule, such as Georg Sauthoff's patch removing the manual loop unroll in __rte_raw_cksum() [1], which allowed the compiler to generate something better.

I guess that "if it isn't broken, don't fix it" applies to DPDK Core libraries too. ;-)


PS: A funny example of an exotic optimization is the use of Duff's Device in rte_pktmbuf_alloc_bulk() [2]; a C implementation of an optimization for assembler code.

[1] http://inbox.dpdk.org/dev/20211017203718.801998-2-mail@gms.tf/
[2] https://elixir.bootlin.com/dpdk/latest/source/lib/mbuf/rte_mbuf.h#L893
  
Thomas Monjalon June 14, 2022, 9:20 p.m. UTC | #4
09/06/2022 09:09, Morten Brørup:
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Wednesday, 8 June 2022 17.27
> > 
> > On Wed, 8 Jun 2022 09:19:20 +0100
> > Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> wrote:
> > 
> > > 07/06/2022 18:17, Stephen Hemminger пишет:
> > > > The function rte_memcpy can derference past source buffer which
> > > > will cause array out of bounds warnings. But there is no good
> > reason
> > > > to use rte_memcpy instead of memcpy in this code. Memcpy is just
> > > > as fast for these small inputs, and compiler will optimize.
> > >
> > >
> > > AFAIK, rte_memcpy() will outperform memcpy() when _size_ parameter
> > > is a variable. Unfortunately that's exactly the case here.
> > > So not sure it is a good change, at least without extensive perf
> > testing.
> > > BTW, if rte_memcpy() really access src buffer beyond it's boundaries,
> > > I think that's definitely a bug that needs to be fixed.
> > 
> > Yes and no.
> > IMHO DPDK should not in the C library business, and glibc etc should be
> > more optimized if necessary.
> 
> A very big +1 to that!
> 
> DPDK contains a lot of optimizations that really belong in the compiler and/or C library, but weren't back then, so the clever DPDK developers put them inside DPDK instead.
> 
> Over time, the compilers and C libraries have improved, and many of these manually implemented optimizations have become obsolete. They should be cleaned up and replaced by simpler code, and the documentation about optimizing code should be updated accordingly.
> 
> Until that happens, we have to expect contributors to use rte_memcpy() and other obsolete optimizations - they are only doing what the DPDK documentation and reference code tells them. Just like application developers are using KNI, because it is so heavily promoted in DPDK documentation.
> 
> The DPDK community has a very high focus on the risk of performance regressions when touching DPDK Core libraries, so a general cleaning is probably not going to happen. Luckily, there are exceptions to every rule, such as Georg Sauthoff's patch removing the manual loop unroll in __rte_raw_cksum() [1], which allowed the compiler to generate something better.
> 
> I guess that "if it isn't broken, don't fix it" applies to DPDK Core libraries too. ;-)

No it doesn't apply, the only limitation is the number of contributions.
Feel free to propose cleanups.
  

Patch

diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c b/lib/ip_frag/rte_ipv4_fragmentation.c
index a19f6fda6408..27a8ad224dec 100644
--- a/lib/ip_frag/rte_ipv4_fragmentation.c
+++ b/lib/ip_frag/rte_ipv4_fragmentation.c
@@ -5,7 +5,6 @@ 
 #include <stddef.h>
 #include <errno.h>
 
-#include <rte_memcpy.h>
 #include <rte_ether.h>
 
 #include "ip_frag_common.h"
@@ -26,7 +25,7 @@  static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst,
 		const struct rte_ipv4_hdr *src, uint16_t header_len,
 		uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf)
 {
-	rte_memcpy(dst, src, header_len);
+	memcpy(dst, src, header_len);
 	fofs = (uint16_t)(fofs + (dofs >> RTE_IPV4_HDR_FO_SHIFT));
 	fofs = (uint16_t)(fofs | mf << RTE_IPV4_HDR_MF_SHIFT);
 	dst->fragment_offset = rte_cpu_to_be_16(fofs);
@@ -48,7 +47,7 @@  static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
 	struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr;
 
 	ipopt_len = 0;
-	rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
+	memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr));
 	ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr);
 
 	uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr);
@@ -65,7 +64,7 @@  static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph,
 			break;
 
 		if (RTE_IPV4_HDR_OPT_COPIED(*p_opt)) {
-			rte_memcpy(ipopt_frag_hdr + ipopt_len,
+			memcpy(ipopt_frag_hdr + ipopt_len,
 				p_opt, p_opt[1]);
 			ipopt_len += p_opt[1];
 		}