On Wed, Sep 02, 2020 at 10:43:40AM +0000, Radu Nicolau wrote:
> Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler
> macros.
>
I think it's worth noting in the commit log that the set of macros provided
by the compilers are more complete than those provided by DPDK, and by not
having our own it allows new instruction sets to be leveraged without
having to do extra work to set them up in DPDK.
> Signed-off-by: Sean Morrissey <sean.morrissey@intel.com>
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> ---
> app/test/test_memcpy_perf.c | 8 ++++----
> config/x86/meson.build | 2 --
> drivers/net/enic/Makefile | 2 +-
> drivers/net/enic/meson.build | 2 +-
> drivers/net/i40e/Makefile | 2 +-
> drivers/net/i40e/meson.build | 2 +-
> drivers/net/iavf/Makefile | 2 +-
> drivers/net/iavf/meson.build | 2 +-
> drivers/net/ice/Makefile | 2 +-
> drivers/net/ice/meson.build | 2 +-
> examples/l3fwd/l3fwd_em.c | 4 ++--
> lib/librte_acl/Makefile | 2 +-
> lib/librte_acl/meson.build | 2 +-
> lib/librte_eal/common/rte_random.c | 4 ++--
> lib/librte_eal/x86/include/rte_memcpy.h | 8 ++++----
> lib/librte_efd/rte_efd_x86.h | 2 +-
> lib/librte_hash/rte_cuckoo_hash.c | 2 +-
> lib/librte_member/rte_member_ht.c | 10 +++++-----
> lib/librte_member/rte_member_x86.h | 2 +-
> lib/librte_net/rte_net_crc.c | 2 +-
> mk/rte.cpuflags.mk | 1 -
> 21 files changed, 31 insertions(+), 34 deletions(-)
>
<snip>
> @@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask)
>
> return vandq_s32(data, mask);
> }
> -#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC)
> +#elif defined(__ALTIVEC__)
Not an x86 flag. Belongs in patch 3.
> static inline xmm_t
> em_mask_key(void *key, xmm_t mask)
> {
> diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
> index f4332b044..3b591c2ed 100644
> --- a/lib/librte_acl/Makefile
> +++ b/lib/librte_acl/Makefile
> @@ -38,7 +38,7 @@ endif
> #
>
> #check if flag for AVX2 is already on, if not set it up manually
> -ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
> +ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
> CC_AVX2_SUPPORT=1
> else
> CC_AVX2_SUPPORT=\
> diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
> index d1e2c184c..b31a3f798 100644
> --- a/lib/librte_acl/meson.build
> +++ b/lib/librte_acl/meson.build
> @@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86')
> # in former case, just add avx2 C file to files list
> # in latter case, compile c file to static lib, using correct compiler
> # flags, and then have the .o file from static lib linked into main lib.
> - if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2')
> + if cc.get_define('__AVX2__', args: machine_args) != ''
Since this is used in a number of places, we probably should just get the
result in a variable in config/x86/meson.build.
> sources += files('acl_run_avx2.c')
> cflags += '-DCC_AVX2_SUPPORT'
> elif cc.has_argument('-mavx2')
<snip>
@@ -51,13 +51,13 @@ static size_t buf_sizes[TEST_VALUE_RANGE];
#define TEST_BATCH_SIZE 100
/* Data is aligned on this many bytes (power of 2) */
-#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+#ifdef __AVX512F__
#define ALIGNMENT_UNIT 64
-#elif defined RTE_MACHINE_CPUFLAG_AVX2
+#elif defined __AVX2__
#define ALIGNMENT_UNIT 32
-#else /* RTE_MACHINE_CPUFLAG */
+#else
#define ALIGNMENT_UNIT 16
-#endif /* RTE_MACHINE_CPUFLAG */
+#endif
/*
* Pointers used in performance tests. The two large buffers are for uncached
@@ -18,7 +18,6 @@ endif
base_flags = ['SSE', 'SSE2', 'SSE3','SSSE3', 'SSE4_1', 'SSE4_2']
foreach f:base_flags
- dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1)
compile_time_cpuflags += ['RTE_CPUFLAG_' + f]
endforeach
@@ -32,7 +31,6 @@ foreach f:optional_flags
elif f == 'RDRND'
f = 'RDRAND'
endif
- dpdk_conf.set('RTE_MACHINE_CPUFLAG_' + f, 1)
compile_time_cpuflags += ['RTE_CPUFLAG_' + f]
endif
endforeach
@@ -45,7 +45,7 @@ ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
# 'default' machine (corei7 which has no avx2) and run the binary on
# newer CPUs that have avx2.
# This part is verbatim from i40e makefile.
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
CC_AVX2_SUPPORT=1
else
CC_AVX2_SUPPORT=\
@@ -19,7 +19,7 @@ deps += ['hash']
includes += include_directories('base')
# The current implementation assumes 64-bit pointers
-if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and dpdk_conf.get('RTE_ARCH_64')
+if cc.get_define('__AVX2__', args: machine_args) != '' and dpdk_conf.get('RTE_ARCH_64')
sources += files('enic_rxtx_vec_avx2.c')
# Build the avx2 handler if the compiler supports it, even though 'machine'
# does not. This is to support users who build for the min supported machine
@@ -85,7 +85,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e_tm.c
SRCS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e_vf_representor.c
ifeq ($(CONFIG_RTE_LIBRTE_I40E_INC_VECTOR),y)
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
CC_AVX2_SUPPORT=1
else
CC_AVX2_SUPPORT=\
@@ -31,7 +31,7 @@ if arch_subdir == 'x86'
# compile AVX2 version if either:
# a. we have AVX supported in minimum instruction set baseline
# b. it's not minimum instruction set, but supported by compiler
- if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2')
+ if cc.get_define('__AVX2__', args: machine_args) != ''
cflags += ['-DCC_AVX2_SUPPORT']
sources += files('i40e_rxtx_vec_avx2.c')
elif cc.has_argument('-mavx2')
@@ -31,7 +31,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_IAVF_PMD) += iavf_rxtx_vec_sse.c
endif
ifeq ($(CONFIG_RTE_LIBRTE_IAVF_PMD), y)
- ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
CC_AVX2_SUPPORT=1
else
CC_AVX2_SUPPORT=\
@@ -21,7 +21,7 @@ if arch_subdir == 'x86'
# compile AVX2 version if either:
# a. we have AVX supported in minimum instruction set baseline
# b. it's not minimum instruction set, but supported by compiler
- if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2')
+ if cc.get_define('__AVX2__', args: machine_args) != ''
cflags += ['-DCC_AVX2_SUPPORT']
sources += files('iavf_rxtx_vec_avx2.c')
elif cc.has_argument('-mavx2')
@@ -66,7 +66,7 @@ endif
SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_switch_filter.c
SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_fdir_filter.c
SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_hash.c
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
CC_AVX2_SUPPORT=1
else
CC_AVX2_SUPPORT=\
@@ -22,7 +22,7 @@ if arch_subdir == 'x86'
# compile AVX2 version if either:
# a. we have AVX supported in minimum instruction set baseline
# b. it's not minimum instruction set, but supported by compiler
- if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2')
+ if cc.get_define('__AVX2__', args: machine_args) != ''
sources += files('ice_rxtx_vec_avx2.c')
elif cc.has_argument('-mavx2')
ice_avx2_lib = static_library('ice_avx2_lib',
@@ -215,7 +215,7 @@ static rte_xmm_t mask0;
static rte_xmm_t mask1;
static rte_xmm_t mask2;
-#if defined(RTE_MACHINE_CPUFLAG_SSE2)
+#if defined(__SSE2__)
static inline xmm_t
em_mask_key(void *key, xmm_t mask)
{
@@ -231,7 +231,7 @@ em_mask_key(void *key, xmm_t mask)
return vandq_s32(data, mask);
}
-#elif defined(RTE_MACHINE_CPUFLAG_ALTIVEC)
+#elif defined(__ALTIVEC__)
static inline xmm_t
em_mask_key(void *key, xmm_t mask)
{
@@ -38,7 +38,7 @@ endif
#
#check if flag for AVX2 is already on, if not set it up manually
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ifneq ($(filter $(AUTO_CPUFLAGS),__AVX2__),)
CC_AVX2_SUPPORT=1
else
CC_AVX2_SUPPORT=\
@@ -15,7 +15,7 @@ if dpdk_conf.has('RTE_ARCH_X86')
# in former case, just add avx2 C file to files list
# in latter case, compile c file to static lib, using correct compiler
# flags, and then have the .o file from static lib linked into main lib.
- if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2')
+ if cc.get_define('__AVX2__', args: machine_args) != ''
sources += files('acl_run_avx2.c')
cflags += '-DCC_AVX2_SUPPORT'
elif cc.has_argument('-mavx2')
@@ -2,7 +2,7 @@
* Copyright(c) 2019 Ericsson AB
*/
-#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+#ifdef __RDSEED__
#include <x86intrin.h>
#endif
#include <stdlib.h>
@@ -188,7 +188,7 @@ __rte_random_initial_seed(void)
if (ge_rc == 0)
return ge_seed;
#endif
-#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+#ifdef __RDSEED__
unsigned int rdseed_low;
unsigned int rdseed_high;
@@ -45,7 +45,7 @@ extern "C" {
static __rte_always_inline void *
rte_memcpy(void *dst, const void *src, size_t n);
-#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+#ifdef __AVX512F__
#define ALIGNMENT_MASK 0x3F
@@ -286,7 +286,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
goto COPY_BLOCK_128_BACK63;
}
-#elif defined RTE_MACHINE_CPUFLAG_AVX2
+#elif defined __AVX2__
#define ALIGNMENT_MASK 0x1F
@@ -479,7 +479,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
goto COPY_BLOCK_128_BACK31;
}
-#else /* RTE_MACHINE_CPUFLAG */
+#else /* __AVX512F__ */
#define ALIGNMENT_MASK 0x0F
@@ -803,7 +803,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
goto COPY_BLOCK_64_BACK15;
}
-#endif /* RTE_MACHINE_CPUFLAG */
+#endif /* __AVX512F__ */
static __rte_always_inline void *
rte_memcpy_aligned(void *dst, const void *src, size_t n)
@@ -19,7 +19,7 @@ efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx,
const efd_lookuptbl_t *group_lookup_table,
const uint32_t hash_val_a, const uint32_t hash_val_b)
{
-#ifdef RTE_MACHINE_CPUFLAG_AVX2
+#ifdef __AVX2__
efd_value_t value = 0;
uint32_t i = 0;
__m256i vhash_val_a = _mm256_set1_epi32(hash_val_a);
@@ -1691,7 +1691,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
/* For match mask the first bit of every two bits indicates the match */
switch (sig_cmp_fn) {
-#if defined(RTE_MACHINE_CPUFLAG_SSE2)
+#if defined(__SSE2__)
case RTE_HASH_COMPARE_SSE:
/* Compare all signatures in the bucket */
*prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
@@ -176,7 +176,7 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss,
get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(RTE_ARCH_X86) && defined(__AVX2__)
case RTE_MEMBER_COMPARE_AVX2:
if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
set_id) ||
@@ -216,7 +216,7 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
for (i = 0; i < num_keys; i++) {
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(RTE_ARCH_X86) && defined(__AVX2__)
case RTE_MEMBER_COMPARE_AVX2:
if (search_bucket_single_avx(prim_buckets[i],
tmp_sig[i], buckets, &set_id[i]) ||
@@ -253,7 +253,7 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(RTE_ARCH_X86) && defined(__AVX2__)
case RTE_MEMBER_COMPARE_AVX2:
search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
&num_matches, match_per_key, set_id);
@@ -296,7 +296,7 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
match_cnt_tmp = 0;
switch (ss->sig_cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(RTE_ARCH_X86) && defined(__AVX2__)
case RTE_MEMBER_COMPARE_AVX2:
search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
buckets, &match_cnt_tmp, match_per_key,
@@ -357,7 +357,7 @@ try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
enum rte_member_sig_compare_function cmp_fn)
{
switch (cmp_fn) {
-#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(RTE_ARCH_X86) && defined(__AVX2__)
case RTE_MEMBER_COMPARE_AVX2:
if (update_entry_search_avx(prim, sig, buckets, set_id) ||
update_entry_search_avx(sec, sig, buckets,
@@ -11,7 +11,7 @@ extern "C" {
#include <x86intrin.h>
-#if defined(RTE_MACHINE_CPUFLAG_AVX2)
+#if defined(__AVX2__)
static inline int
update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
@@ -10,7 +10,7 @@
#include <rte_common.h>
#include <rte_net_crc.h>
-#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
+#if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__)
#define X86_64_SSE42_PCLMULQDQ 1
#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL)
#define ARM64_NEON_PMULL 1
@@ -113,7 +113,6 @@ CPUFLAGS += SHA1
CPUFLAGS += SHA2
endif
-MACHINE_CFLAGS += $(addprefix -DRTE_MACHINE_CPUFLAG_,$(CPUFLAGS))
# To strip whitespace
comma:= ,