[v5,01/39] eal: use C11 alignas

Message ID 1708715054-22386-2-git-send-email-roretzla@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series use C11 alignas |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Tyler Retzlaff Feb. 23, 2024, 7:03 p.m. UTC
The current location used for __rte_aligned(a) for alignment of types
and variables is not compatible with MSVC. There is only a single
location accepted by both toolchains.

For variables standard C11 offers alignas(a) supported by conformant
compilers i.e. both MSVC and GCC.

For types the standard offers no alignment facility that compatibly
interoperates with C and C++ but may be achieved by relocating the
placement of __rte_aligned(a) to the aforementioned location accepted
by all currently supported toolchains.

To allow alignment for both compilers do the following:

* Expand __rte_aligned(a) to __declspec(align(a)) when building
  with MSVC.

* Move __rte_aligned from the end of {struct,union} definitions to
  be between {struct,union} and tag.

  The placement between {struct,union} and the tag allows the desired
  alignment to be imparted on the type regardless of the toolchain being
  used for all of GCC, LLVM, MSVC compilers building both C and C++.

* Replace use of __rte_aligned(a) on variables/fields with alignas(a).

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 lib/eal/arm/include/rte_vect.h       |  4 ++--
 lib/eal/common/malloc_elem.h         |  4 ++--
 lib/eal/common/malloc_heap.h         |  4 ++--
 lib/eal/common/rte_keepalive.c       |  3 ++-
 lib/eal/common/rte_random.c          |  4 ++--
 lib/eal/common/rte_service.c         |  8 ++++----
 lib/eal/include/generic/rte_atomic.h |  4 ++--
 lib/eal/include/rte_common.h         | 23 +++++++++++++++--------
 lib/eal/loongarch/include/rte_vect.h |  8 ++++----
 lib/eal/ppc/include/rte_vect.h       |  4 ++--
 lib/eal/riscv/include/rte_vect.h     |  4 ++--
 lib/eal/x86/include/rte_vect.h       |  4 ++--
 lib/eal/x86/rte_power_intrinsics.c   | 10 ++++++----
 13 files changed, 47 insertions(+), 37 deletions(-)
  

Comments

Bruce Richardson Feb. 26, 2024, 11:13 a.m. UTC | #1
On Fri, Feb 23, 2024 at 11:03:36AM -0800, Tyler Retzlaff wrote:
> The current location used for __rte_aligned(a) for alignment of types
> and variables is not compatible with MSVC. There is only a single
> location accepted by both toolchains.
> 
> For variables standard C11 offers alignas(a) supported by conformant
> compilers i.e. both MSVC and GCC.
> 
> For types the standard offers no alignment facility that compatibly
> interoperates with C and C++ but may be achieved by relocating the
> placement of __rte_aligned(a) to the aforementioned location accepted
> by all currently supported toolchains.
> 
> To allow alignment for both compilers do the following:
> 
> * Expand __rte_aligned(a) to __declspec(align(a)) when building
>   with MSVC.
> 
> * Move __rte_aligned from the end of {struct,union} definitions to
>   be between {struct,union} and tag.
> 
>   The placement between {struct,union} and the tag allows the desired
>   alignment to be imparted on the type regardless of the toolchain being
>   used for all of GCC, LLVM, MSVC compilers building both C and C++.
> 
> * Replace use of __rte_aligned(a) on variables/fields with alignas(a).
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  lib/eal/arm/include/rte_vect.h       |  4 ++--
>  lib/eal/common/malloc_elem.h         |  4 ++--
>  lib/eal/common/malloc_heap.h         |  4 ++--
>  lib/eal/common/rte_keepalive.c       |  3 ++-
>  lib/eal/common/rte_random.c          |  4 ++--
>  lib/eal/common/rte_service.c         |  8 ++++----
>  lib/eal/include/generic/rte_atomic.h |  4 ++--
>  lib/eal/include/rte_common.h         | 23 +++++++++++++++--------
>  lib/eal/loongarch/include/rte_vect.h |  8 ++++----
>  lib/eal/ppc/include/rte_vect.h       |  4 ++--
>  lib/eal/riscv/include/rte_vect.h     |  4 ++--
>  lib/eal/x86/include/rte_vect.h       |  4 ++--
>  lib/eal/x86/rte_power_intrinsics.c   | 10 ++++++----
>  13 files changed, 47 insertions(+), 37 deletions(-)
> 
Just to chime in with one additional benefit of this change - it will
prevent static analysers, IDEs and doxygen[1] from ever mistaking the
__rte_aligned tag at the end of a struct define as being a variable
definition. In the absence of a macro definition from our DPDK header files
this defines a variable called __rte_cache_aligned (as in [1]!):

   struct xyz {
   } __rte_cache_aligned;

while this just gives an error to let you know the definiton is missing:

  struct __rte_cache_aligned xyz {
  };

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

[1] http://doc.dpdk.org/api/structrte__ring.html#a43d0b019eced25dc6c357f3b4f0f47e5
  

Patch

diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
index 8cfe4bd..c97d299 100644
--- a/lib/eal/arm/include/rte_vect.h
+++ b/lib/eal/arm/include/rte_vect.h
@@ -24,14 +24,14 @@ 
 #define	XMM_SIZE	(sizeof(xmm_t))
 #define	XMM_MASK	(XMM_SIZE - 1)
 
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
 	xmm_t    x;
 	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
 /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
index 952ce73..c7ff671 100644
--- a/lib/eal/common/malloc_elem.h
+++ b/lib/eal/common/malloc_elem.h
@@ -20,7 +20,7 @@  enum elem_state {
 	ELEM_PAD  /* element is a padding-only header */
 };
 
-struct malloc_elem {
+struct __rte_cache_aligned malloc_elem {
 	struct malloc_heap *heap;
 	struct malloc_elem *volatile prev;
 	/**< points to prev elem in memseg */
@@ -48,7 +48,7 @@  struct malloc_elem {
 	size_t user_size;
 	uint64_t asan_cookie[2]; /* must be next to header_cookie */
 #endif
-} __rte_cache_aligned;
+};
 
 static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
 
diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h
index 8f3ab57..0c49588 100644
--- a/lib/eal/common/malloc_heap.h
+++ b/lib/eal/common/malloc_heap.h
@@ -21,7 +21,7 @@ 
 /**
  * Structure to hold malloc heap
  */
-struct malloc_heap {
+struct __rte_cache_aligned malloc_heap {
 	rte_spinlock_t lock;
 	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
 	struct malloc_elem *volatile first;
@@ -31,7 +31,7 @@  struct malloc_heap {
 	unsigned int socket_id;
 	size_t total_size;
 	char name[RTE_HEAP_NAME_MAX_LEN];
-} __rte_cache_aligned;
+};
 
 void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
index f6db973..391c1be 100644
--- a/lib/eal/common/rte_keepalive.c
+++ b/lib/eal/common/rte_keepalive.c
@@ -2,6 +2,7 @@ 
  * Copyright(c) 2015-2016 Intel Corporation
  */
 
+#include <stdalign.h>
 #include <inttypes.h>
 
 #include <rte_common.h>
@@ -19,7 +20,7 @@  struct rte_keepalive {
 		/*
 		 * Each element must be cache aligned to prevent false sharing.
 		 */
-		enum rte_keepalive_state core_state __rte_cache_aligned;
+		alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state core_state;
 	} live_data[RTE_KEEPALIVE_MAXCORES];
 
 	/** Last-seen-alive timestamps */
diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 7709b8f..90e91b3 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -13,14 +13,14 @@ 
 #include <rte_lcore.h>
 #include <rte_random.h>
 
-struct rte_rand_state {
+struct __rte_cache_aligned rte_rand_state {
 	uint64_t z1;
 	uint64_t z2;
 	uint64_t z3;
 	uint64_t z4;
 	uint64_t z5;
 	RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
 
 /* One instance each for every lcore id-equipped thread, and one
  * additional instance to be shared by all others threads (i.e., all
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d959c91..5637993 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -32,7 +32,7 @@ 
 #define RUNSTATE_RUNNING 1
 
 /* internal representation of a service */
-struct rte_service_spec_impl {
+struct __rte_cache_aligned rte_service_spec_impl {
 	/* public part of the struct */
 	struct rte_service_spec spec;
 
@@ -53,7 +53,7 @@  struct rte_service_spec_impl {
 	 * on currently.
 	 */
 	RTE_ATOMIC(uint32_t) num_mapped_cores;
-} __rte_cache_aligned;
+};
 
 struct service_stats {
 	RTE_ATOMIC(uint64_t) calls;
@@ -61,7 +61,7 @@  struct service_stats {
 };
 
 /* the internal values of a service core */
-struct core_state {
+struct __rte_cache_aligned core_state {
 	/* map of services IDs are run on this core */
 	uint64_t service_mask;
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
@@ -71,7 +71,7 @@  struct core_state {
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
-} __rte_cache_aligned;
+};
 
 static uint32_t rte_service_count;
 static struct rte_service_spec_impl *rte_services;
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index 0e639da..f859707 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -1094,7 +1094,7 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
 /**
  * 128-bit integer structure.
  */
-typedef struct {
+typedef struct __rte_aligned(16) {
 	union {
 		uint64_t val[2];
 #ifdef RTE_ARCH_64
@@ -1103,7 +1103,7 @@  static inline void rte_atomic64_clear(rte_atomic64_t *v)
 #endif
 #endif
 	};
-} __rte_aligned(16) rte_int128_t;
+} rte_int128_t;
 
 #ifdef __DOXYGEN__
 
diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index 1cc1222..0908aa0 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -12,6 +12,8 @@ 
  * for DPDK.
  */
 
+#include <stdalign.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -63,10 +65,19 @@ 
 #endif
 
 /**
- * Force alignment
+ * Force type alignment
+ *
+ * This macro should be used when alignment of a struct or union type
+ * is required. For toolchain compatibility it should appear between
+ * the {struct,union} keyword and tag. e.g.
+ *
+ *   struct __rte_aligned(8) tag { ... };
+ *
+ * If alignment of an object/variable is required then this macro should
+ * not be used, instead prefer C11 alignas(a).
  */
 #ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_aligned(a)
+#define __rte_aligned(a) __declspec(align(a))
 #else
 #define __rte_aligned(a) __attribute__((__aligned__(a)))
 #endif
@@ -538,18 +549,14 @@  static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
 #define RTE_CACHE_LINE_MIN_SIZE 64
 
 /** Force alignment to cache line. */
-#ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_cache_aligned
-#else
 #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
-#endif
 
 /** Force minimum cache line alignment. */
 #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
 
 #define _RTE_CACHE_GUARD_HELPER2(unique) \
-	char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] \
-	__rte_cache_aligned
+	alignas(RTE_CACHE_LINE_SIZE) \
+	char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES]
 #define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique)
 /**
  * Empty cache lines, to guard against false sharing-like effects
diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h
index 1546515..aa334e8 100644
--- a/lib/eal/loongarch/include/rte_vect.h
+++ b/lib/eal/loongarch/include/rte_vect.h
@@ -15,7 +15,7 @@ 
 
 #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
 
-typedef union xmm {
+typedef union __rte_aligned(16) xmm {
 	int8_t   i8[16];
 	int16_t  i16[8];
 	int32_t  i32[4];
@@ -25,19 +25,19 @@ 
 	uint32_t u32[4];
 	uint64_t u64[2];
 	double   pd[2];
-} __rte_aligned(16) xmm_t;
+} xmm_t;
 
 #define XMM_SIZE        (sizeof(xmm_t))
 #define XMM_MASK        (XMM_SIZE - 1)
 
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
 	xmm_t	 x;
 	uint8_t	 u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 static inline xmm_t
 vect_load_128(void *p)
diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
index a5f009b..c8bace2 100644
--- a/lib/eal/ppc/include/rte_vect.h
+++ b/lib/eal/ppc/include/rte_vect.h
@@ -22,14 +22,14 @@ 
 #define	XMM_SIZE	(sizeof(xmm_t))
 #define	XMM_MASK	(XMM_SIZE - 1)
 
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
 	xmm_t    x;
 	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
 	double   pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 #ifdef __cplusplus
 }
diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index da9092a..6df10fa 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -22,14 +22,14 @@ 
 #define XMM_SIZE	(sizeof(xmm_t))
 #define XMM_MASK	(XMM_SIZE - 1)
 
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
 	xmm_t		x;
 	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
 	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
 	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
 	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
 	double		pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
 
 static inline xmm_t
 vect_load_128(void *p)
diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
index 560f9e4..a1a537e 100644
--- a/lib/eal/x86/include/rte_vect.h
+++ b/lib/eal/x86/include/rte_vect.h
@@ -91,7 +91,7 @@ 
 #define RTE_X86_ZMM_SIZE	(sizeof(__m512i))
 #define RTE_X86_ZMM_MASK	(RTE_X86_ZMM_SIZE - 1)
 
-typedef union __rte_x86_zmm {
+typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
 	__m512i	 z;
 	ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
 	xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
@@ -100,7 +100,7 @@ 
 	uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
 	uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
 	double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
-} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
+} __rte_x86_zmm_t;
 
 #endif /* __AVX512F__ */
 
diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
index 532a2e6..6d9b642 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -2,6 +2,8 @@ 
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include <stdalign.h>
+
 #include <rte_common.h>
 #include <rte_lcore.h>
 #include <rte_rtm.h>
@@ -12,10 +14,10 @@ 
 /*
  * Per-lcore structure holding current status of C0.2 sleeps.
  */
-static struct power_wait_status {
+static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status {
 	rte_spinlock_t lock;
 	volatile void *monitor_addr; /**< NULL if not currently sleeping */
-} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+} wait_status[RTE_MAX_LCORE];
 
 /*
  * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
@@ -85,10 +87,10 @@  static void amd_mwaitx(const uint64_t timeout)
 #endif
 }
 
-static struct {
+static alignas(RTE_CACHE_LINE_SIZE) struct {
 	void (*mmonitor)(volatile void *addr);
 	void (*mwait)(const uint64_t timeout);
-} __rte_cache_aligned power_monitor_ops;
+} power_monitor_ops;
 
 static inline void
 __umwait_wakeup(volatile void *addr)