* Expand __rte_aligned(a) to __declspec(align(a)) when building
with MSVC.
* Move __rte_aligned from the end of {struct,union} definitions to
be between {struct,union} and tag.
The placement between {struct,union} and the tag allows the desired
alignment to be imparted on the type regardless of the toolchain being
used for all of GCC, LLVM, MSVC compilers building both C and C++.
* Replace use of __rte_aligned(a) on variables/fields with alignas(a).
Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
lib/eal/arm/include/rte_vect.h | 4 ++--
lib/eal/common/malloc_elem.h | 4 ++--
lib/eal/common/malloc_heap.h | 4 ++--
lib/eal/common/rte_keepalive.c | 3 ++-
lib/eal/common/rte_random.c | 4 ++--
lib/eal/common/rte_service.c | 8 ++++----
lib/eal/include/generic/rte_atomic.h | 4 ++--
lib/eal/include/rte_common.h | 12 +++++-------
lib/eal/loongarch/include/rte_vect.h | 8 ++++----
lib/eal/ppc/include/rte_vect.h | 4 ++--
lib/eal/riscv/include/rte_vect.h | 4 ++--
lib/eal/x86/include/rte_vect.h | 4 ++--
lib/eal/x86/rte_power_intrinsics.c | 10 ++++++----
13 files changed, 37 insertions(+), 36 deletions(-)
14/02/2024 17:35, Tyler Retzlaff:
> * Expand __rte_aligned(a) to __declspec(align(a)) when building
> with MSVC.
>
> * Move __rte_aligned from the end of {struct,union} definitions to
> be between {struct,union} and tag.
>
> The placement between {struct,union} and the tag allows the desired
> alignment to be imparted on the type regardless of the toolchain being
> used for all of GCC, LLVM, MSVC compilers building both C and C++.
>
> * Replace use of __rte_aligned(a) on variables/fields with alignas(a).
We miss an explanation about why __rte_aligned() and alignas() are different.
If __rte_aligned() is only for struct definitions, maybe we should have
a different name?
At least we need a Doxygen comment to guide about its usage.
[...]
> #ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_aligned(a)
> +#define __rte_aligned(a) __declspec(align(a))
> #else
> #define __rte_aligned(a) __attribute__((__aligned__(a)))
> #endif
[...]
> /** Force alignment to cache line. */
> -#ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_cache_aligned
> -#else
> #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
> -#endif
>
> /** Force minimum cache line alignment. */
> #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
@@ -24,14 +24,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
#if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
@@ -20,7 +20,7 @@ enum elem_state {
ELEM_PAD /* element is a padding-only header */
};
-struct malloc_elem {
+struct __rte_cache_aligned malloc_elem {
struct malloc_heap *heap;
struct malloc_elem *volatile prev;
/**< points to prev elem in memseg */
@@ -48,7 +48,7 @@ struct malloc_elem {
size_t user_size;
uint64_t asan_cookie[2]; /* must be next to header_cookie */
#endif
-} __rte_cache_aligned;
+};
static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
@@ -21,7 +21,7 @@
/**
* Structure to hold malloc heap
*/
-struct malloc_heap {
+struct __rte_cache_aligned malloc_heap {
rte_spinlock_t lock;
LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
struct malloc_elem *volatile first;
@@ -31,7 +31,7 @@ struct malloc_heap {
unsigned int socket_id;
size_t total_size;
char name[RTE_HEAP_NAME_MAX_LEN];
-} __rte_cache_aligned;
+};
void *
malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
@@ -2,6 +2,7 @@
* Copyright(c) 2015-2016 Intel Corporation
*/
+#include <stdalign.h>
#include <inttypes.h>
#include <rte_common.h>
@@ -19,7 +20,7 @@ struct rte_keepalive {
/*
* Each element must be cache aligned to prevent false sharing.
*/
- enum rte_keepalive_state core_state __rte_cache_aligned;
+ alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state core_state;
} live_data[RTE_KEEPALIVE_MAXCORES];
/** Last-seen-alive timestamps */
@@ -13,14 +13,14 @@
#include <rte_lcore.h>
#include <rte_random.h>
-struct rte_rand_state {
+struct __rte_cache_aligned rte_rand_state {
uint64_t z1;
uint64_t z2;
uint64_t z3;
uint64_t z4;
uint64_t z5;
RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
/* One instance each for every lcore id-equipped thread, and one
* additional instance to be shared by all others threads (i.e., all
@@ -32,7 +32,7 @@
#define RUNSTATE_RUNNING 1
/* internal representation of a service */
-struct rte_service_spec_impl {
+struct __rte_cache_aligned rte_service_spec_impl {
/* public part of the struct */
struct rte_service_spec spec;
@@ -53,7 +53,7 @@ struct rte_service_spec_impl {
* on currently.
*/
RTE_ATOMIC(uint32_t) num_mapped_cores;
-} __rte_cache_aligned;
+};
struct service_stats {
RTE_ATOMIC(uint64_t) calls;
@@ -61,7 +61,7 @@ struct service_stats {
};
/* the internal values of a service core */
-struct core_state {
+struct __rte_cache_aligned core_state {
/* map of services IDs are run on this core */
uint64_t service_mask;
RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
@@ -71,7 +71,7 @@ struct core_state {
RTE_ATOMIC(uint64_t) loops;
RTE_ATOMIC(uint64_t) cycles;
struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
-} __rte_cache_aligned;
+};
static uint32_t rte_service_count;
static struct rte_service_spec_impl *rte_services;
@@ -1094,7 +1094,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
/**
* 128-bit integer structure.
*/
-typedef struct {
+typedef struct __rte_aligned(16) {
union {
uint64_t val[2];
#ifdef RTE_ARCH_64
@@ -1103,7 +1103,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
#endif
#endif
};
-} __rte_aligned(16) rte_int128_t;
+} rte_int128_t;
#ifdef __DOXYGEN__
@@ -12,6 +12,8 @@
* for DPDK.
*/
+#include <stdalign.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -65,7 +67,7 @@
* Force alignment
*/
#ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_aligned(a)
+#define __rte_aligned(a) __declspec(align(a))
#else
#define __rte_aligned(a) __attribute__((__aligned__(a)))
#endif
@@ -529,18 +531,14 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
#define RTE_CACHE_LINE_MIN_SIZE 64
/** Force alignment to cache line. */
-#ifdef RTE_TOOLCHAIN_MSVC
-#define __rte_cache_aligned
-#else
#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
-#endif
/** Force minimum cache line alignment. */
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
#define _RTE_CACHE_GUARD_HELPER2(unique) \
- char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] \
- __rte_cache_aligned
+ alignas(RTE_CACHE_LINE_SIZE) \
+ char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES]
#define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique)
/**
* Empty cache lines, to guard against false sharing-like effects
@@ -15,7 +15,7 @@
#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
-typedef union xmm {
+typedef union __rte_aligned(16) xmm {
int8_t i8[16];
int16_t i16[8];
int32_t i32[4];
@@ -25,19 +25,19 @@
uint32_t u32[4];
uint64_t u64[2];
double pd[2];
-} __rte_aligned(16) xmm_t;
+} xmm_t;
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
static inline xmm_t
vect_load_128(void *p)
@@ -22,14 +22,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
#ifdef __cplusplus
}
@@ -22,14 +22,14 @@
#define XMM_SIZE (sizeof(xmm_t))
#define XMM_MASK (XMM_SIZE - 1)
-typedef union rte_xmm {
+typedef union __rte_aligned(16) rte_xmm {
xmm_t x;
uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
double pd[XMM_SIZE / sizeof(double)];
-} __rte_aligned(16) rte_xmm_t;
+} rte_xmm_t;
static inline xmm_t
vect_load_128(void *p)
@@ -91,7 +91,7 @@
#define RTE_X86_ZMM_SIZE (sizeof(__m512i))
#define RTE_X86_ZMM_MASK (RTE_X86_ZMM_SIZE - 1)
-typedef union __rte_x86_zmm {
+typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
__m512i z;
ymm_t y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
xmm_t x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
@@ -100,7 +100,7 @@
uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
double pd[RTE_X86_ZMM_SIZE / sizeof(double)];
-} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
+} __rte_x86_zmm_t;
#endif /* __AVX512F__ */
@@ -2,6 +2,8 @@
* Copyright(c) 2020 Intel Corporation
*/
+#include <stdalign.h>
+
#include <rte_common.h>
#include <rte_lcore.h>
#include <rte_rtm.h>
@@ -12,10 +14,10 @@
/*
* Per-lcore structure holding current status of C0.2 sleeps.
*/
-static struct power_wait_status {
+static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status {
rte_spinlock_t lock;
volatile void *monitor_addr; /**< NULL if not currently sleeping */
-} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+} wait_status[RTE_MAX_LCORE];
/*
* This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
@@ -85,10 +87,10 @@ static void amd_mwaitx(const uint64_t timeout)
#endif
}
-static struct {
+static alignas(RTE_CACHE_LINE_SIZE) struct {
void (*mmonitor)(volatile void *addr);
void (*mwait)(const uint64_t timeout);
-} __rte_cache_aligned power_monitor_ops;
+} power_monitor_ops;
static inline void
__umwait_wakeup(volatile void *addr)