The current location used for __rte_aligned(a) for alignment of types
and variables is not compatible with MSVC. There is only a single
location accepted by both toolchains.
For variables standard C11 offers alignas(a) supported by conformant
compilers i.e. both MSVC and GCC.
For types the standard offers no alignment facility that compatibly
interoperates with C and C++ but may be achieved by relocating the
placement of __rte_aligned(a) to the aforementioned location accepted
by all currently supported toolchains.
To allow alignment for both compilers do the following:
* Expand __rte_aligned(a) to __declspec(align(a)) when building
with MSVC.
* Move __rte_aligned from the end of {struct,union} definitions to
be between {struct,union} and tag.
The placement between {struct,union} and the tag allows the desired
alignment to be imparted on the type regardless of the toolchain being
used for all of GCC, LLVM, MSVC compilers building both C and C++.
* Replace use of __rte_aligned(a) on variables/fields with alignas(a).
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
lib/eal/arm/include/rte_vect.h | 4 ++--
lib/eal/common/malloc_elem.h | 4 ++--
lib/eal/common/malloc_heap.h | 4 ++--
lib/eal/common/rte_keepalive.c | 3 ++-
lib/eal/common/rte_random.c | 4 ++--
lib/eal/common/rte_service.c | 8 ++++----
lib/eal/include/generic/rte_atomic.h | 4 ++--
lib/eal/include/rte_common.h | 23 +++++++++++++++--------
lib/eal/loongarch/include/rte_vect.h | 8 ++++----
lib/eal/ppc/include/rte_vect.h | 4 ++--
lib/eal/riscv/include/rte_vect.h | 4 ++--
lib/eal/x86/include/rte_vect.h | 4 ++--
lib/eal/x86/rte_power_intrinsics.c | 10 ++++++----
13 files changed, 47 insertions(+), 37 deletions(-)
On Fri, Feb 23, 2024 at 11:03:36AM -0800, Tyler Retzlaff wrote:
> The current location used for __rte_aligned(a) for alignment of types
> and variables is not compatible with MSVC. There is only a single
> location accepted by both toolchains.
>
> For variables standard C11 offers alignas(a) supported by conformant
> compilers i.e. both MSVC and GCC.
>
> For types the standard offers no alignment facility that compatibly
> interoperates with C and C++ but may be achieved by relocating the
> placement of __rte_aligned(a) to the aforementioned location accepted
> by all currently supported toolchains.
>
> To allow alignment for both compilers do the following:
>
> * Expand __rte_aligned(a) to __declspec(align(a)) when building
> with MSVC.
>
> * Move __rte_aligned from the end of {struct,union} definitions to
> be between {struct,union} and tag.
>
> The placement between {struct,union} and the tag allows the desired
> alignment to be imparted on the type regardless of the toolchain being
> used for all of GCC, LLVM, MSVC compilers building both C and C++.
>
> * Replace use of __rte_aligned(a) on variables/fields with alignas(a).
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> lib/eal/arm/include/rte_vect.h | 4 ++--
> lib/eal/common/malloc_elem.h | 4 ++--
> lib/eal/common/malloc_heap.h | 4 ++--
> lib/eal/common/rte_keepalive.c | 3 ++-
> lib/eal/common/rte_random.c | 4 ++--
> lib/eal/common/rte_service.c | 8 ++++----
> lib/eal/include/generic/rte_atomic.h | 4 ++--
> lib/eal/include/rte_common.h | 23 +++++++++++++++--------
> lib/eal/loongarch/include/rte_vect.h | 8 ++++----
> lib/eal/ppc/include/rte_vect.h | 4 ++--
> lib/eal/riscv/include/rte_vect.h | 4 ++--
> lib/eal/x86/include/rte_vect.h | 4 ++--
> lib/eal/x86/rte_power_intrinsics.c | 10 ++++++----
> 13 files changed, 47 insertions(+), 37 deletions(-)
>
Just to chime in with one additional benefit of this change - it will
prevent static analysers, IDEs and doxygen[1] from ever mistaking the
__rte_aligned tag at the end of a struct define as being a variable
definition. In the absence of a macro definition from our DPDK header files
this defines a variable called __rte_cache_aligned (as in [1]!):
struct xyz {
} __rte_cache_aligned;
while this just gives an error to let you know the definiton is missing:
struct __rte_cache_aligned xyz {
};
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
[1] http://doc.dpdk.org/api/structrte__ring.html#a43d0b019eced25dc6c357f3b4f0f47e5
@@ -24,14 +24,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
#if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
@@ -20,7 +20,7 @@ enum elem_state {
ELEM_PAD /* element is a padding-only header */
};
-struct malloc_elem {
+struct __rte_cache_aligned malloc_elem {
struct malloc_heap *heap;
struct malloc_elem *volatile prev;
/**< points to prev elem in memseg */
@@ -48,7 +48,7 @@ struct malloc_elem {
size_t user_size;
uint64_t asan_cookie[2]; /* must be next to header_cookie */
#endif
-} __rte_cache_aligned;
+};
static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
@@ -21,7 +21,7 @@
/**
* Structure to hold malloc heap
*/
-struct malloc_heap {
+struct __rte_cache_aligned malloc_heap {
rte_spinlock_t lock;
LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
struct malloc_elem *volatile first;
@@ -31,7 +31,7 @@ struct malloc_heap {
unsigned int socket_id;
size_t total_size;
char name[RTE_HEAP_NAME_MAX_LEN];
-} __rte_cache_aligned;
+};
void *
malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
@@ -2,6 +2,7 @@
* Copyright(c) 2015-2016 Intel Corporation
*/
+#include <stdalign.h>
#include <inttypes.h>
#include <rte_common.h>
@@ -19,7 +20,7 @@ struct rte_keepalive {
/*
* Each element must be cache aligned to prevent false sharing.
*/
- enum rte_keepalive_state core_state __rte_cache_aligned;
+ alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state core_state;
} live_data[RTE_KEEPALIVE_MAXCORES];
/** Last-seen-alive timestamps */
@@ -13,14 +13,14 @@
#include <rte_lcore.h>
#include <rte_random.h>
-struct rte_rand_state {
+struct __rte_cache_aligned rte_rand_state {
uint64_t z1;
uint64_t z2;
uint64_t z3;
uint64_t z4;
uint64_t z5;
RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
/* One instance each for every lcore id-equipped thread, and one
* additional instance to be shared by all others threads (i.e., all
@@ -32,7 +32,7 @@
#define RUNSTATE_RUNNING 1
/* internal representation of a service */
-struct rte_service_spec_impl {
+struct __rte_cache_aligned rte_service_spec_impl {
/* public part of the struct */
struct rte_service_spec spec;
@@ -53,7 +53,7 @@ struct rte_service_spec_impl {
* on currently.
*/
RTE_ATOMIC(uint32_t) num_mapped_cores;
-} __rte_cache_aligned;
+};
struct service_stats {
RTE_ATOMIC(uint64_t) calls;
@@ -61,7 +61,7 @@ struct service_stats {
};
/* the internal values of a service core */
-struct core_state {
+struct __rte_cache_aligned core_state {
/* map of services IDs are run on this core */
uint64_t service_mask;
RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
@@ -71,7 +71,7 @@ struct core_state {
RTE_ATOMIC(uint64_t) loops;
RTE_ATOMIC(uint64_t) cycles;
struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
-} __rte_cache_aligned;
+};
static uint32_t rte_service_count;
static struct rte_service_spec_impl *rte_services;
@@ -1094,7 +1094,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
/**
* 128-bit integer structure.
*/
-typedef struct {
+typedef struct __rte_aligned(16) {
union {
uint64_t val[2];
#ifdef RTE_ARCH_64
@@ -1103,7 +1103,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
#endif
#endif
};
-} __rte_aligned(16) rte_int128_t;
+} rte_int128_t;
#ifdef __DOXYGEN__
@@ -12,6 +12,8 @@
* for DPDK.
*/
+#include <stdalign.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -63,10 +65,19 @@
#endif
/**
- * Force alignment
+ * Force type alignment
+ *
+ * This macro should be used when alignment of a struct or union type
+ * is required. For toolchain compatibility it should appear between
+ * the {struct,union} keyword and tag. e.g.
+ *
+ * struct __rte_aligned(8) tag { ... };
+ *
+ * If alignment of an object/variable is required then this macro should
+ * not be used, instead prefer C11 alignas(a).
*/
#ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_aligned(a)
+#define __rte_aligned(a) __declspec(align(a))
#else
#define __rte_aligned(a) __attribute__((__aligned__(a)))
#endif
@@ -538,18 +549,14 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
#define RTE_CACHE_LINE_MIN_SIZE 64
/** Force alignment to cache line. */
-#ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_cache_aligned
-#else
#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
-#endif
/** Force minimum cache line alignment. */
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
#define _RTE_CACHE_GUARD_HELPER2(unique) \
- char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] \
- __rte_cache_aligned
+ alignas(RTE_CACHE_LINE_SIZE) \
+ char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES]
#define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique)
/**
* Empty cache lines, to guard against false sharing-like effects
@@ -15,7 +15,7 @@
#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
-typedef union xmm {
+typedef union __rte_aligned(16) xmm {
int8_t i8[16];
int16_t i16[8];
int32_t i32[4];
@@ -25,19 +25,19 @@
uint32_t u32[4];
uint64_t u64[2];
double pd[2];
-} __rte_aligned(16) xmm_t;
+} xmm_t;
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
static inline xmm_t
vect_load_128(void *p)
@@ -22,14 +22,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
#ifdef __cplusplus
}
@@ -22,14 +22,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
static inline xmm_t
vect_load_128(void *p)
@@ -91,7 +91,7 @@
#define RTE_X86_ZMM_SIZE (sizeof(__m512i))
#define RTE_X86_ZMM_MASK (RTE_X86_ZMM_SIZE - 1)
-typedef union __rte_x86_zmm {
+typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
__m512i z;
ymm_t y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
xmm_t x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
@@ -100,7 +100,7 @@
uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
double pd[RTE_X86_ZMM_SIZE / sizeof(double)];
-} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
+} __rte_x86_zmm_t;
#endif /* __AVX512F__ */
@@ -2,6 +2,8 @@
* Copyright(c) 2020 Intel Corporation
*/
+#include <stdalign.h>
+
#include <rte_common.h>
#include <rte_lcore.h>
#include <rte_rtm.h>
@@ -12,10 +14,10 @@
/*
* Per-lcore structure holding current status of C0.2 sleeps.
*/
-static struct power_wait_status {
+static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status {
rte_spinlock_t lock;
volatile void *monitor_addr; /**< NULL if not currently sleeping */
-} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+} wait_status[RTE_MAX_LCORE];
/*
* This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
@@ -85,10 +87,10 @@ static void amd_mwaitx(const uint64_t timeout)
#endif
}
-static struct {
+static alignas(RTE_CACHE_LINE_SIZE) struct {
void (*mmonitor)(volatile void *addr);
void (*mwait)(const uint64_t timeout);
-} __rte_cache_aligned power_monitor_ops;
+} power_monitor_ops;
static inline void
__umwait_wakeup(volatile void *addr)