[v2,5/5] eal/x86: force inlining of all memcpy and mov helpers

Message ID 20190517150613.13310-6-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: I-cache pressure optimizations |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Maxime Coquelin May 17, 2019, 3:06 p.m. UTC
  Some helpers in the header file are forced inlined other are
only inlined, this patch forces inline for all.

It will avoid it to be embedded as functions when called multiple
times in the same object file. For example, when we added packed
ring support in vhost-user library, rte_memcpy_generic got no
more inlined.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 .../common/include/arch/x86/rte_memcpy.h       | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
  

Comments

David Marchand May 20, 2019, 8:30 a.m. UTC | #1
On Fri, May 17, 2019 at 5:14 PM Maxime Coquelin <maxime.coquelin@redhat.com>
wrote:

> Some helpers in the header file are forced inlined other are
> only inlined, this patch forces inline for all.
>
> It will avoid it to be embedded as functions when called multiple
> times in the same object file. For example, when we added packed
> ring support in vhost-user library, rte_memcpy_generic got no
> more inlined.
>

Weird that we have only some functions marked as always inlined in commit:
https://git.dpdk.org/dpdk/commit/?id=1c9467a6efd8d85b5bbbf7004a4407cae2d09431

Bruce, is there a reason for this?
  

Patch

diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index 7b758094df..ba44c4a328 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -115,7 +115,7 @@  rte_mov256(uint8_t *dst, const uint8_t *src)
  * Copy 128-byte blocks from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 {
 	__m512i zmm0, zmm1;
@@ -163,7 +163,7 @@  rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
 	}
 }
 
-static inline void *
+static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
 	uintptr_t dstu = (uintptr_t)dst;
@@ -330,7 +330,7 @@  rte_mov64(uint8_t *dst, const uint8_t *src)
  * Copy 128 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
@@ -343,7 +343,7 @@  rte_mov128(uint8_t *dst, const uint8_t *src)
  * Copy 128-byte blocks from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 {
 	__m256i ymm0, ymm1, ymm2, ymm3;
@@ -363,7 +363,7 @@  rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 	}
 }
 
-static inline void *
+static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
 	uintptr_t dstu = (uintptr_t)dst;
@@ -523,7 +523,7 @@  rte_mov64(uint8_t *dst, const uint8_t *src)
  * Copy 128 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -655,7 +655,7 @@  __extension__ ({                                                      \
     }                                                                 \
 })
 
-static inline void *
+static __rte_always_inline void *
 rte_memcpy_generic(void *dst, const void *src, size_t n)
 {
 	__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
@@ -800,7 +800,7 @@  rte_memcpy_generic(void *dst, const void *src, size_t n)
 
 #endif /* RTE_MACHINE_CPUFLAG */
 
-static inline void *
+static __rte_always_inline void *
 rte_memcpy_aligned(void *dst, const void *src, size_t n)
 {
 	void *ret = dst;
@@ -860,7 +860,7 @@  rte_memcpy_aligned(void *dst, const void *src, size_t n)
 	return ret;
 }
 
-static inline void *
+static __rte_always_inline void *
 rte_memcpy(void *dst, const void *src, size_t n)
 {
 	if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK))