[v4,4/4] eal/atomic: add wrapper for c11 atomics

Message ID 1589270586-4480-5-git-send-email-phil.yang@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series generic rte atomic APIs deprecate proposal |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/travis-robot success Travis build: passed

Commit Message

Phil Yang May 12, 2020, 8:03 a.m. UTC
  Wraps up compiler c11 atomic built-ins with explicit memory ordering
parameter.

Signed-off-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/include/generic/rte_atomic_c11.h | 139 ++++++++++++++++++++++++
 lib/librte_eal/include/meson.build              |   1 +
 2 files changed, 140 insertions(+)
 create mode 100644 lib/librte_eal/include/generic/rte_atomic_c11.h
  

Comments

Morten Brørup May 12, 2020, 11:18 a.m. UTC | #1
> From: Phil Yang [mailto:phil.yang@arm.com]
> Sent: Tuesday, May 12, 2020 10:03 AM
> 
> Wraps up compiler c11 atomic built-ins with explicit memory ordering
> parameter.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> ---
>  lib/librte_eal/include/generic/rte_atomic_c11.h | 139
> ++++++++++++++++++++++++
>  lib/librte_eal/include/meson.build              |   1 +
>  2 files changed, 140 insertions(+)
>  create mode 100644 lib/librte_eal/include/generic/rte_atomic_c11.h
> 
> diff --git a/lib/librte_eal/include/generic/rte_atomic_c11.h
> b/lib/librte_eal/include/generic/rte_atomic_c11.h
> new file mode 100644
> index 0000000..20490f4
> --- /dev/null
> +++ b/lib/librte_eal/include/generic/rte_atomic_c11.h
> @@ -0,0 +1,139 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Arm Limited
> + */
> +
> +#ifndef _RTE_ATOMIC_C11_H_
> +#define _RTE_ATOMIC_C11_H_
> +
> +#include <rte_common.h>
> +
> +/**
> + * @file
> + * c11 atomic operations
> + *
> + * This file wraps up compiler (GCC) c11 atomic built-ins.
> + * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
> + */
> +
> +#define memory_order_relaxed __ATOMIC_RELAXED
> +#define memory_order_consume __ATOMIC_CONSUME
> +#define memory_order_acquire __ATOMIC_ACQUIRE
> +#define memory_order_release __ATOMIC_RELEASE
> +#define memory_order_acq_rel __ATOMIC_ACQ_REL
> +#define memory_order_seq_cst __ATOMIC_SEQ_CST

Why redefine these instead of using the original names?

If we need to redefine them, they should be upper case and RTE_ prefixed.

> +
> +/* Generic atomic load.
> + * It returns the contents of *PTR.
> + *
> + * The valid memory order variants are:
> + * memory_order_relaxed
> + * memory_order_consume
> + * memory_order_acquire
> + * memory_order_seq_cst
> + */
> +#define rte_atomic_load(PTR, MO)			\
> +	(__extension__ ({				\
> +		typeof(PTR) _ptr = (PTR);		\
> +		typeof(*_ptr) _ret;			\
> +		__atomic_load(_ptr, &_ret, (MO));	\
> +		_ret;					\
> +	}))
> +
> +/* Generic atomic store.
> + * It stores the value of VAL into *PTR.
> + *
> + * The valid memory order variants are:
> + * memory_order_relaxed
> + * memory_order_release
> + * memory_order_seq_cst
> + */
> +#define rte_atomic_store(PTR, VAL, MO)			\
> +	(__extension__ ({				\
> +		typeof(PTR) _ptr = (PTR);		\
> +		typeof(*_ptr) _val = (VAL);		\
> +		__atomic_store(_ptr, &_val, (MO));	\
> +	}))
> +
> +/* Generic atomic exchange.
> + * It stores the value of VAL into *PTR.
> + * It returns the original value of *PTR.
> + *
> + * The valid memory order variants are:
> + * memory_order_relaxed
> + * memory_order_acquire
> + * memory_order_release
> + * memory_order_acq_rel
> + * memory_order_seq_cst
> + */
> +#define rte_atomic_exchange(PTR, VAL, MO)			\
> +	(__extension__ ({					\
> +		typeof(PTR) _ptr = (PTR);			\
> +		typeof(*_ptr) _val = (VAL);			\
> +		typeof(*_ptr) _ret;				\
> +		__atomic_exchange(_ptr, &_val, &_ret, (MO));	\
> +		_ret;						\
> +	}))
> +
> +/* Generic atomic compare and exchange.
> + * It compares the contents of *PTR with the contents of *EXP.
> + * If equal, the operation is a read-modify-write operation that
> + * writes DES into *PTR.
> + * If they are not equal, the operation is a read and the current
> + * contents of *PTR are written into *EXP.
> + *
> + * The weak compare_exchange may fail spuriously and the strong
> + * variation will never fails spuriously.

"will never fails spuriously" -> "will never fail" / "never fails".

And I suggest that you elaborate what "fail" means here,
i.e. what exactly can happen when it fails.

> + *
> + * If DES is written into *PTR then true is returned and memory is
> + * affected according to the memory order specified by SUC_MO.
> + * There are no restrictions on what memory order can be used here.
> + *
> + * Otherwise, false is returned and memory is affected according to
> + * FAIL_MO. This memory order cannot be memory_order_release nor
> + * memory_order_acq_rel. It also cannot be a stronger order than that
> + * specified by SUC_MO.
> + */
> +#define rte_atomic_compare_exchange_weak(PTR, EXP, DES, SUC_MO,
> FAIL_MO)    \
> +	(__extension__ ({						    \
> +		typeof(PTR) _ptr = (PTR);				    \
> +		typeof(*_ptr) _des = (DES);				    \
> +		__atomic_compare_exchange(_ptr, (EXP), &_des, 1,	    \
> +				 (SUC_MO), (FAIL_MO));			    \
> +	}))
> +
> +#define rte_atomic_compare_exchange_strong(PTR, EXP, DES, SUC_MO,
> FAIL_MO)  \
> +	(__extension__ ({						    \
> +		typeof(PTR) _ptr = (PTR);				    \
> +		typeof(*_ptr) _des = (DES);				    \
> +		__atomic_compare_exchange(_ptr, (EXP), &_des, 0,	    \
> +				 (SUC_MO), (FAIL_MO));			    \
> +	}))
> +
> +#define rte_atomic_fetch_add(PTR, VAL, MO)		\
> +	__atomic_fetch_add((PTR), (VAL), (MO))
> +#define rte_atomic_fetch_sub(PTR, VAL, MO)		\
> +	__atomic_fetch_sub((PTR), (VAL), (MO))
> +#define rte_atomic_fetch_or(PTR, VAL, MO)		\
> +	__atomic_fetch_or((PTR), (VAL), (MO))
> +#define rte_atomic_fetch_xor(PTR, VAL, MO)		\
> +	__atomic_fetch_xor((PTR), (VAL), (MO))
> +#define rte_atomic_fetch_and(PTR, VAL, MO)		\
> +	__atomic_fetch_and((PTR), (VAL), (MO))
> +
> +#define rte_atomic_add_fetch(PTR, VAL, MO)		\
> +	__atomic_add_fetch((PTR), (VAL), (MO))
> +#define rte_atomic_sub_fetch(PTR, VAL, MO)		\
> +	__atomic_sub_fetch((PTR), (VAL), (MO))
> +#define rte_atomic_or_fetch(PTR, VAL, MO)		\
> +	__atomic_or_fetch((PTR), (VAL), (MO))
> +#define rte_atomic_xor_fetch(PTR, VAL, MO)		\
> +	__atomic_xor_fetch((PTR), (VAL), (MO))
> +#define rte_atomic_and_fetch(PTR, VAL, MO)		\
> +	__atomic_and_fetch((PTR), (VAL), (MO))
> +
> +/* Synchronization fence between threads based on
> + * the specified memory order.
> + */
> +#define rte_atomic_thread_fence(MO) __atomic_thread_fence((MO))
> +
> +#endif /* _RTE_ATOMIC_C11_H_ */
> diff --git a/lib/librte_eal/include/meson.build
> b/lib/librte_eal/include/meson.build
> index bc73ec2..dac1aac 100644
> --- a/lib/librte_eal/include/meson.build
> +++ b/lib/librte_eal/include/meson.build
> @@ -51,6 +51,7 @@ headers += files(
>  # special case install the generic headers, since they go in a subdir
>  generic_headers = files(
>  	'generic/rte_atomic.h',
> +	'generic/rte_atomic_c11.h',
>  	'generic/rte_byteorder.h',
>  	'generic/rte_cpuflags.h',
>  	'generic/rte_cycles.h',
> --
> 2.7.4
> 

Thumbs up for the good function documentation. :-)


Med venlig hilsen / kind regards
- Morten Brørup
  
Stephen Hemminger May 12, 2020, 6:20 p.m. UTC | #2
On Tue, May 12, 2020 at 4:03 pm, Phil Yang <phil.yang@arm.com> wrote:
> parameter.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com 
> <mailto:phil.yang@arm.com>>


What is the purpose of having rte_atomic at all?
Is this level of indirection really helping?
  
Honnappa Nagarahalli May 12, 2020, 7:23 p.m. UTC | #3
<snip>

Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics

On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com> wrote:

parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>


What is the purpose of having rte_atomic at all?
Is this level of indirection really helping? 
[HONNAPPA] (not sure why this email has html format, converted to text format)
I believe you meant, why not use the __atomic_xxx built-ins directly? The only reason for now is handling of __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to rte_smp_mb which has an optimized implementation for x86. According to Konstantin, the compiler does not generate optimal code. Wrapping that built-in alone is going to be confusing.

The wrappers also allow us to have our own implementation using inline assembly for compilers versions that do not support C11 atomic built-ins. But, I do not know if there is a need to support those versions.
  
Morten Brørup May 13, 2020, 8:57 a.m. UTC | #4
> From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
> Sent: Tuesday, May 12, 2020 9:24 PM
> 
> <snip>
> 
> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> 
> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
> wrote:
> 
> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> 
> 
> What is the purpose of having rte_atomic at all?
> Is this level of indirection really helping?
> [HONNAPPA] (not sure why this email has html format, converted to text
> format)
> I believe you meant, why not use the __atomic_xxx built-ins directly?
> The only reason for now is handling of
> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to
> rte_smp_mb which has an optimized implementation for x86. According to
> Konstantin, the compiler does not generate optimal code. Wrapping that
> built-in alone is going to be confusing.
> 
> The wrappers also allow us to have our own implementation using inline
> assembly for compilers versions that do not support C11 atomic built-
> ins. But, I do not know if there is a need to support those versions.

If I recall correctly, someone mentioned that one (or more) of the aging enterprise Linux distributions don't include a compiler with C11 atomics.

I think Stephen is onto something here...

It is silly to add wrappers like this, if the only purpose is to support compilers and distributions that don't properly support an official C standard which is nearly a decade old. The quality and quantity of the DPDK documentation for these functions (including examples, discussions on Stack Overflow, etc.) will be inferior to the documentation of the standard C11 atomics, which increases the probability of incorrect use.

And if some compiler generates code that is suboptimal for a user, then it should be the choice of the user to either accept it or use a better compiler. Using a suboptimal compiler will not only affect the user's DPDK applications, but all applications developed by the user. And if he accepts it for his other applications, he will also accept it for his DPDK applications.

We could introduce some sort of marker or standardized comment to indicate when functions only exist for backwards compatibility with ancient compilers and similar, with a reference to documentation describing why. And when the documented preconditions are no longer relevant, e.g. when those particular enterprise Linux distributions become obsolete, these functions become obsolete too, and should be removed. However, getting rid of obsolete cruft will break the ABI. In other words: Added cruft will never be removed again, so think twice before adding.


Med venlig hilsen / kind regards
- Morten Brørup
  
Phil Yang May 13, 2020, 9:40 a.m. UTC | #5
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Tuesday, May 12, 2020 7:18 PM
> To: Phil Yang <Phil.Yang@arm.com>; thomas@monjalon.net; dev@dpdk.org
> Cc: bruce.richardson@intel.com; ferruh.yigit@intel.com;
> hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; jerinj@marvell.com;
> ktraynor@redhat.com; konstantin.ananyev@intel.com;
> maxime.coquelin@redhat.com; olivier.matz@6wind.com;
> stephen@networkplumber.org; mattias.ronnblom@ericsson.com;
> harry.van.haaren@intel.com; erik.g.carrillo@intel.com; nd <nd@arm.com>;
> David Christensen <drc@linux.vnet.ibm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> 
> > From: Phil Yang [mailto:phil.yang@arm.com]
> > Sent: Tuesday, May 12, 2020 10:03 AM
> >
> > Wraps up compiler c11 atomic built-ins with explicit memory ordering
> > parameter.
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > ---
> >  lib/librte_eal/include/generic/rte_atomic_c11.h | 139
> > ++++++++++++++++++++++++
> >  lib/librte_eal/include/meson.build              |   1 +
> >  2 files changed, 140 insertions(+)
> >  create mode 100644 lib/librte_eal/include/generic/rte_atomic_c11.h
> >
> > diff --git a/lib/librte_eal/include/generic/rte_atomic_c11.h
> > b/lib/librte_eal/include/generic/rte_atomic_c11.h
> > new file mode 100644
> > index 0000000..20490f4
> > --- /dev/null
> > +++ b/lib/librte_eal/include/generic/rte_atomic_c11.h
> > @@ -0,0 +1,139 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_ATOMIC_C11_H_
> > +#define _RTE_ATOMIC_C11_H_
> > +
> > +#include <rte_common.h>
> > +
> > +/**
> > + * @file
> > + * c11 atomic operations
> > + *
> > + * This file wraps up compiler (GCC) c11 atomic built-ins.
> > + * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
> > + */
> > +
> > +#define memory_order_relaxed __ATOMIC_RELAXED
> > +#define memory_order_consume __ATOMIC_CONSUME
> > +#define memory_order_acquire __ATOMIC_ACQUIRE
> > +#define memory_order_release __ATOMIC_RELEASE
> > +#define memory_order_acq_rel __ATOMIC_ACQ_REL
> > +#define memory_order_seq_cst __ATOMIC_SEQ_CST
> 
> Why redefine these instead of using the original names?
> 
> If we need to redefine them, they should be upper case and RTE_ prefixed.

Agreed, we don't need to redefine them. I was trying to align with the stdatomic library. 
I will remove them in the next version.

> 
> > +
> > +/* Generic atomic load.
> > + * It returns the contents of *PTR.
> > + *
> > + * The valid memory order variants are:
> > + * memory_order_relaxed
> > + * memory_order_consume
> > + * memory_order_acquire
> > + * memory_order_seq_cst
> > + */
> > +#define rte_atomic_load(PTR, MO)			\
> > +	(__extension__ ({				\
> > +		typeof(PTR) _ptr = (PTR);		\
> > +		typeof(*_ptr) _ret;			\
> > +		__atomic_load(_ptr, &_ret, (MO));	\
> > +		_ret;					\
> > +	}))
> > +
> > +/* Generic atomic store.
> > + * It stores the value of VAL into *PTR.
> > + *
> > + * The valid memory order variants are:
> > + * memory_order_relaxed
> > + * memory_order_release
> > + * memory_order_seq_cst
> > + */
> > +#define rte_atomic_store(PTR, VAL, MO)			\
> > +	(__extension__ ({				\
> > +		typeof(PTR) _ptr = (PTR);		\
> > +		typeof(*_ptr) _val = (VAL);		\
> > +		__atomic_store(_ptr, &_val, (MO));	\
> > +	}))
> > +
> > +/* Generic atomic exchange.
> > + * It stores the value of VAL into *PTR.
> > + * It returns the original value of *PTR.
> > + *
> > + * The valid memory order variants are:
> > + * memory_order_relaxed
> > + * memory_order_acquire
> > + * memory_order_release
> > + * memory_order_acq_rel
> > + * memory_order_seq_cst
> > + */
> > +#define rte_atomic_exchange(PTR, VAL, MO)			\
> > +	(__extension__ ({					\
> > +		typeof(PTR) _ptr = (PTR);			\
> > +		typeof(*_ptr) _val = (VAL);			\
> > +		typeof(*_ptr) _ret;				\
> > +		__atomic_exchange(_ptr, &_val, &_ret, (MO));	\
> > +		_ret;						\
> > +	}))
> > +
> > +/* Generic atomic compare and exchange.
> > + * It compares the contents of *PTR with the contents of *EXP.
> > + * If equal, the operation is a read-modify-write operation that
> > + * writes DES into *PTR.
> > + * If they are not equal, the operation is a read and the current
> > + * contents of *PTR are written into *EXP.
> > + *
> > + * The weak compare_exchange may fail spuriously and the strong
> > + * variation will never fails spuriously.
> 
> "will never fails spuriously" -> "will never fail" / "never fails".

Thanks, I will fix it in the next version.

> 
> And I suggest that you elaborate what "fail" means here,
> i.e. what exactly can happen when it fails.

Yes. That would be better. I will update it in the new version.
Fail spuriously means the compare exchange operation acts as *PTR != *EXP and return false even if they are equal.

> 
> > + *
> > + * If DES is written into *PTR then true is returned and memory is
> > + * affected according to the memory order specified by SUC_MO.
> > + * There are no restrictions on what memory order can be used here.
> > + *
> > + * Otherwise, false is returned and memory is affected according to
> > + * FAIL_MO. This memory order cannot be memory_order_release nor
> > + * memory_order_acq_rel. It also cannot be a stronger order than that
> > + * specified by SUC_MO.
> > + */
> > +#define rte_atomic_compare_exchange_weak(PTR, EXP, DES, SUC_MO,
> > FAIL_MO)    \
> > +	(__extension__ ({						    \
> > +		typeof(PTR) _ptr = (PTR);				    \
> > +		typeof(*_ptr) _des = (DES);				    \
> > +		__atomic_compare_exchange(_ptr, (EXP), &_des, 1,	    \
> > +				 (SUC_MO), (FAIL_MO));
> 	    \
> > +	}))
> > +
> > +#define rte_atomic_compare_exchange_strong(PTR, EXP, DES, SUC_MO,
> > FAIL_MO)  \
> > +	(__extension__ ({						    \
> > +		typeof(PTR) _ptr = (PTR);				    \
> > +		typeof(*_ptr) _des = (DES);				    \
> > +		__atomic_compare_exchange(_ptr, (EXP), &_des, 0,	    \
> > +				 (SUC_MO), (FAIL_MO));
> 	    \
> > +	}))
> > +
> > +#define rte_atomic_fetch_add(PTR, VAL, MO)		\
> > +	__atomic_fetch_add((PTR), (VAL), (MO))
> > +#define rte_atomic_fetch_sub(PTR, VAL, MO)		\
> > +	__atomic_fetch_sub((PTR), (VAL), (MO))
> > +#define rte_atomic_fetch_or(PTR, VAL, MO)		\
> > +	__atomic_fetch_or((PTR), (VAL), (MO))
> > +#define rte_atomic_fetch_xor(PTR, VAL, MO)		\
> > +	__atomic_fetch_xor((PTR), (VAL), (MO))
> > +#define rte_atomic_fetch_and(PTR, VAL, MO)		\
> > +	__atomic_fetch_and((PTR), (VAL), (MO))
> > +
> > +#define rte_atomic_add_fetch(PTR, VAL, MO)		\
> > +	__atomic_add_fetch((PTR), (VAL), (MO))
> > +#define rte_atomic_sub_fetch(PTR, VAL, MO)		\
> > +	__atomic_sub_fetch((PTR), (VAL), (MO))
> > +#define rte_atomic_or_fetch(PTR, VAL, MO)		\
> > +	__atomic_or_fetch((PTR), (VAL), (MO))
> > +#define rte_atomic_xor_fetch(PTR, VAL, MO)		\
> > +	__atomic_xor_fetch((PTR), (VAL), (MO))
> > +#define rte_atomic_and_fetch(PTR, VAL, MO)		\
> > +	__atomic_and_fetch((PTR), (VAL), (MO))
> > +
> > +/* Synchronization fence between threads based on
> > + * the specified memory order.
> > + */
> > +#define rte_atomic_thread_fence(MO) __atomic_thread_fence((MO))
> > +
> > +#endif /* _RTE_ATOMIC_C11_H_ */
> > diff --git a/lib/librte_eal/include/meson.build
> > b/lib/librte_eal/include/meson.build
> > index bc73ec2..dac1aac 100644
> > --- a/lib/librte_eal/include/meson.build
> > +++ b/lib/librte_eal/include/meson.build
> > @@ -51,6 +51,7 @@ headers += files(
> >  # special case install the generic headers, since they go in a subdir
> >  generic_headers = files(
> >  	'generic/rte_atomic.h',
> > +	'generic/rte_atomic_c11.h',
> >  	'generic/rte_byteorder.h',
> >  	'generic/rte_cpuflags.h',
> >  	'generic/rte_cycles.h',
> > --
> > 2.7.4
> >
> 
> Thumbs up for the good function documentation. :-)

Thank you for your comments.

Thanks,
Phil

> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup
> 
>
  
Ananyev, Konstantin May 13, 2020, 11:53 a.m. UTC | #6
> <snip>
> 
> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> 
> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com> wrote:
> 
> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> 
> 
> What is the purpose of having rte_atomic at all?
> Is this level of indirection really helping?
> [HONNAPPA] (not sure why this email has html format, converted to text format)
> I believe you meant, why not use the __atomic_xxx built-ins directly? The only reason for now is handling of
> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to rte_smp_mb which has an optimized implementation for x86.
> According to Konstantin, the compiler does not generate optimal code. Wrapping that built-in alone is going to be confusing.
> 
> The wrappers also allow us to have our own implementation using inline assembly for compilers versions that do not support C11 atomic
> built-ins. But, I do not know if there is a need to support those versions.

Few thoughts from my side about that patch:
Yes, for __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence' which is quite expensive,
and can be a avoided for SMP case.
Though I don't see why we need to create our own wrappers  for *all*  __atomic buitins.
From my perspective it would be sufficient to just introduce few of them:
rte_thread_fence_XXX (where XXX - supported memory-orders: RELEASE, ACUIQRE, SEQ_CST, etc.).
For all other __atomic built-ins I don't see any problem to use them directly,
without introducing any wrappers around.  

As a side note, this patch implements rte_atomic_thread_fence() as a simple wrapper around
__atomic_thread_fence(), so concern mentioned above is not addressed.
  
Honnappa Nagarahalli May 13, 2020, 3:06 p.m. UTC | #7
> > <snip>
> >
> > Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> >
> > On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
> wrote:
> >
> > parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> >
> >
> > What is the purpose of having rte_atomic at all?
> > Is this level of indirection really helping?
> > [HONNAPPA] (not sure why this email has html format, converted to text
> > format) I believe you meant, why not use the __atomic_xxx built-ins
> > directly? The only reason for now is handling of
> > __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to
> rte_smp_mb which has an optimized implementation for x86.
> > According to Konstantin, the compiler does not generate optimal code.
> Wrapping that built-in alone is going to be confusing.
> >
> > The wrappers also allow us to have our own implementation using inline
> > assembly for compilers versions that do not support C11 atomic built-ins.
> But, I do not know if there is a need to support those versions.
> 
> Few thoughts from my side about that patch:
> Yes, for __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence'
> which is quite expensive, and can be a avoided for SMP case.
> Though I don't see why we need to create our own wrappers  for *all*
> __atomic buitins.
> From my perspective it would be sufficient to just introduce few of them:
> rte_thread_fence_XXX (where XXX - supported memory-orders: RELEASE,
> ACUIQRE, SEQ_CST, etc.).
> For all other __atomic built-ins I don't see any problem to use them directly,
> without introducing any wrappers around.
I am all for not doing wrappers for the sake of doing. Here, we were concerned about the uniformity of the code, hence did the wrappers for all. Does, anyone have any concerns with doing the wrappers only for __atomic_thread_fence?

Is there any possibility that the compiler will change in the future to generate the optimized code for x86?

For the API, we already have 'rte_atomic128_cmp_exchange' implemented with C11 semantics, I suggest we keep this one also on the same lines. This would require the memory order to be a parameter.

> 
> As a side note, this patch implements rte_atomic_thread_fence() as a simple
> wrapper around __atomic_thread_fence(), so concern mentioned above is not
> addressed.
Agreed. So, we will just pick the implementation of rte_smp_mb for x86 for this.
  
Honnappa Nagarahalli May 13, 2020, 3:30 p.m. UTC | #8
<snip>
> > Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> >
> > On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
> > wrote:
> >
> > parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> >
> >
> > What is the purpose of having rte_atomic at all?
> > Is this level of indirection really helping?
> > [HONNAPPA] (not sure why this email has html format, converted to text
> > format)
> > I believe you meant, why not use the __atomic_xxx built-ins directly?
> > The only reason for now is handling of
> > __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to
> > rte_smp_mb which has an optimized implementation for x86. According to
> > Konstantin, the compiler does not generate optimal code. Wrapping that
> > built-in alone is going to be confusing.
> >
> > The wrappers also allow us to have our own implementation using inline
> > assembly for compilers versions that do not support C11 atomic built-
> > ins. But, I do not know if there is a need to support those versions.
> 
> If I recall correctly, someone mentioned that one (or more) of the aging
> enterprise Linux distributions don't include a compiler with C11 atomics.
I searched through the mailing list yesterday and I could not find anyone mentioning about compilers not supporting C11 built-ins. However, the C11 atomic APIs (as defined in stdatomic.h) are supported in later versions of the compilers. So, using C11 built-ins gives us better coverage with older compilers (including the ones being used in Intel CI which were the oldest versions mentioned on the mailing list).
IMO, we should not be worried about compilers that do not support C11.

> 
> I think Stephen is onto something here...
> 
> It is silly to add wrappers like this, if the only purpose is to support compilers
> and distributions that don't properly support an official C standard which is
> nearly a decade old. The quality and quantity of the DPDK documentation for
> these functions (including examples, discussions on Stack Overflow, etc.) will
> be inferior to the documentation of the standard C11 atomics, which
> increases the probability of incorrect use.
I agree. I do not want to add them for the sake of adding them. But, I do think that we need to solve the issues in DPDK (if they affect performance) which could be due to tools. As Konstantin suggested, we could do the wrappers only for the __atomic_thread_fence built-in. This will make life lot easier.

> 
> And if some compiler generates code that is suboptimal for a user, then it
> should be the choice of the user to either accept it or use a better compiler.
> Using a suboptimal compiler will not only affect the user's DPDK applications,
> but all applications developed by the user. And if he accepts it for his other
> applications, he will also accept it for his DPDK applications.
> 
> We could introduce some sort of marker or standardized comment to indicate
> when functions only exist for backwards compatibility with ancient compilers
> and similar, with a reference to documentation describing why. And when the
> documented preconditions are no longer relevant, e.g. when those particular
> enterprise Linux distributions become obsolete, these functions become
> obsolete too, and should be removed. However, getting rid of obsolete cruft
> will break the ABI. In other words: Added cruft will never be removed again,
> so think twice before adding.
> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup
> 
>
  
Honnappa Nagarahalli May 13, 2020, 3:32 p.m. UTC | #9
<snip>

> > Subject: RE: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> >
> > > From: Phil Yang [mailto:phil.yang@arm.com]
> > > Sent: Tuesday, May 12, 2020 10:03 AM
> > >
> > > Wraps up compiler c11 atomic built-ins with explicit memory ordering
> > > parameter.
> > >
> > > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > > ---
> > >  lib/librte_eal/include/generic/rte_atomic_c11.h | 139
> > > ++++++++++++++++++++++++
> > >  lib/librte_eal/include/meson.build              |   1 +
> > >  2 files changed, 140 insertions(+)
> > >  create mode 100644 lib/librte_eal/include/generic/rte_atomic_c11.h
> > >
> > > diff --git a/lib/librte_eal/include/generic/rte_atomic_c11.h
> > > b/lib/librte_eal/include/generic/rte_atomic_c11.h
> > > new file mode 100644
> > > index 0000000..20490f4
> > > --- /dev/null
> > > +++ b/lib/librte_eal/include/generic/rte_atomic_c11.h
> > > @@ -0,0 +1,139 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2020 Arm Limited
> > > + */
> > > +
> > > +#ifndef _RTE_ATOMIC_C11_H_
> > > +#define _RTE_ATOMIC_C11_H_
> > > +
> > > +#include <rte_common.h>
> > > +
> > > +/**
> > > + * @file
> > > + * c11 atomic operations
> > > + *
> > > + * This file wraps up compiler (GCC) c11 atomic built-ins.
> > > + *
> > > +https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
> > > + */
> > > +
> > > +#define memory_order_relaxed __ATOMIC_RELAXED #define
> > > +memory_order_consume __ATOMIC_CONSUME #define
> memory_order_acquire
> > > +__ATOMIC_ACQUIRE #define memory_order_release
> __ATOMIC_RELEASE
> > > +#define memory_order_acq_rel __ATOMIC_ACQ_REL #define
> > > +memory_order_seq_cst __ATOMIC_SEQ_CST
> >
> > Why redefine these instead of using the original names?
> >
> > If we need to redefine them, they should be upper case and RTE_ prefixed.
> 
> Agreed, we don't need to redefine them. I was trying to align with the
> stdatomic library.
> I will remove them in the next version.
Agree, this will keep it inline with rte_atomic128_cmp_exchange API.

> 
> >
> > > +
> > > +/* Generic atomic load.
> > > + * It returns the contents of *PTR.
> > > + *
> > > + * The valid memory order variants are:
> > > + * memory_order_relaxed
> > > + * memory_order_consume
> > > + * memory_order_acquire
> > > + * memory_order_seq_cst
> > > + */
> > > +#define rte_atomic_load(PTR, MO)\
> > > +(__extension__ ({\
> > > +typeof(PTR) _ptr = (PTR);\
> > > +typeof(*_ptr) _ret;\
> > > +__atomic_load(_ptr, &_ret, (MO));\
> > > +_ret;\
> > > +}))
> > > +
> > > +/* Generic atomic store.
> > > + * It stores the value of VAL into *PTR.
> > > + *
> > > + * The valid memory order variants are:
> > > + * memory_order_relaxed
> > > + * memory_order_release
> > > + * memory_order_seq_cst
> > > + */
> > > +#define rte_atomic_store(PTR, VAL, MO)\ (__extension__ ({\
> > > +typeof(PTR) _ptr = (PTR);\
> > > +typeof(*_ptr) _val = (VAL);\
> > > +__atomic_store(_ptr, &_val, (MO));\
> > > +}))
> > > +
> > > +/* Generic atomic exchange.
> > > + * It stores the value of VAL into *PTR.
> > > + * It returns the original value of *PTR.
> > > + *
> > > + * The valid memory order variants are:
> > > + * memory_order_relaxed
> > > + * memory_order_acquire
> > > + * memory_order_release
> > > + * memory_order_acq_rel
> > > + * memory_order_seq_cst
> > > + */
> > > +#define rte_atomic_exchange(PTR, VAL, MO)\ (__extension__ ({\
> > > +typeof(PTR) _ptr = (PTR);\
> > > +typeof(*_ptr) _val = (VAL);\
> > > +typeof(*_ptr) _ret;\
> > > +__atomic_exchange(_ptr, &_val, &_ret, (MO));\ _ret;\
> > > +}))
> > > +
> > > +/* Generic atomic compare and exchange.
> > > + * It compares the contents of *PTR with the contents of *EXP.
> > > + * If equal, the operation is a read-modify-write operation that
> > > + * writes DES into *PTR.
> > > + * If they are not equal, the operation is a read and the current
> > > + * contents of *PTR are written into *EXP.
> > > + *
> > > + * The weak compare_exchange may fail spuriously and the strong
> > > + * variation will never fails spuriously.
> >
> > "will never fails spuriously" -> "will never fail" / "never fails".
> 
> Thanks, I will fix it in the next version.
> 
> >
> > And I suggest that you elaborate what "fail" means here, i.e. what
> > exactly can happen when it fails.
> 
> Yes. That would be better. I will update it in the new version.
> Fail spuriously means the compare exchange operation acts as *PTR != *EXP
> and return false even if they are equal.
> 
> >
> > > + *
> > > + * If DES is written into *PTR then true is returned and memory is
> > > + * affected according to the memory order specified by SUC_MO.
> > > + * There are no restrictions on what memory order can be used here.
> > > + *
> > > + * Otherwise, false is returned and memory is affected according to
> > > + * FAIL_MO. This memory order cannot be memory_order_release nor
> > > + * memory_order_acq_rel. It also cannot be a stronger order than
> > > +that
> > > + * specified by SUC_MO.
> > > + */
> > > +#define rte_atomic_compare_exchange_weak(PTR, EXP, DES, SUC_MO,
> > > FAIL_MO)    \
> > > +(__extension__ ({    \
> > > +typeof(PTR) _ptr = (PTR);    \
> > > +typeof(*_ptr) _des = (DES);    \
> > > +__atomic_compare_exchange(_ptr, (EXP), &_des, 1,    \
> > > + (SUC_MO), (FAIL_MO));
> >     \
> > > +}))
> > > +
> > > +#define rte_atomic_compare_exchange_strong(PTR, EXP, DES, SUC_MO,
> > > FAIL_MO)  \
> > > +(__extension__ ({    \
> > > +typeof(PTR) _ptr = (PTR);    \
> > > +typeof(*_ptr) _des = (DES);    \
> > > +__atomic_compare_exchange(_ptr, (EXP), &_des, 0,    \
> > > + (SUC_MO), (FAIL_MO));
> >     \
> > > +}))
> > > +
> > > +#define rte_atomic_fetch_add(PTR, VAL, MO)\
> > > +__atomic_fetch_add((PTR), (VAL), (MO)) #define
> > > +rte_atomic_fetch_sub(PTR, VAL, MO)\ __atomic_fetch_sub((PTR),
> > > +(VAL), (MO)) #define rte_atomic_fetch_or(PTR, VAL, MO)\
> > > +__atomic_fetch_or((PTR), (VAL), (MO)) #define
> > > +rte_atomic_fetch_xor(PTR, VAL, MO)\ __atomic_fetch_xor((PTR),
> > > +(VAL), (MO)) #define rte_atomic_fetch_and(PTR, VAL, MO)\
> > > +__atomic_fetch_and((PTR), (VAL), (MO))
> > > +
> > > +#define rte_atomic_add_fetch(PTR, VAL, MO)\
> > > +__atomic_add_fetch((PTR), (VAL), (MO)) #define
> > > +rte_atomic_sub_fetch(PTR, VAL, MO)\ __atomic_sub_fetch((PTR),
> > > +(VAL), (MO)) #define rte_atomic_or_fetch(PTR, VAL, MO)\
> > > +__atomic_or_fetch((PTR), (VAL), (MO)) #define
> > > +rte_atomic_xor_fetch(PTR, VAL, MO)\ __atomic_xor_fetch((PTR),
> > > +(VAL), (MO)) #define rte_atomic_and_fetch(PTR, VAL, MO)\
> > > +__atomic_and_fetch((PTR), (VAL), (MO))
> > > +
> > > +/* Synchronization fence between threads based on
> > > + * the specified memory order.
> > > + */
> > > +#define rte_atomic_thread_fence(MO) __atomic_thread_fence((MO))
> > > +
> > > +#endif /* _RTE_ATOMIC_C11_H_ */
> > > diff --git a/lib/librte_eal/include/meson.build
> > > b/lib/librte_eal/include/meson.build
> > > index bc73ec2..dac1aac 100644
> > > --- a/lib/librte_eal/include/meson.build
> > > +++ b/lib/librte_eal/include/meson.build
> > > @@ -51,6 +51,7 @@ headers += files(
> > >  # special case install the generic headers, since they go in a
> > > subdir  generic_headers = files(  'generic/rte_atomic.h',
> > > +'generic/rte_atomic_c11.h',
> > >  'generic/rte_byteorder.h',
> > >  'generic/rte_cpuflags.h',
> > >  'generic/rte_cycles.h',
> > > --
> > > 2.7.4
> > >
> >
> > Thumbs up for the good function documentation. :-)
> 
> Thank you for your comments.
> 
> Thanks,
> Phil
> 
> >
> >
> > Med venlig hilsen / kind regards
> > - Morten Brørup
> >
> >
>
  
Mattias Rönnblom May 13, 2020, 7:04 p.m. UTC | #10
On 2020-05-13 10:57, Morten Brørup wrote:
>> From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
>> Sent: Tuesday, May 12, 2020 9:24 PM
>>
>> <snip>
>>
>> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
>>
>> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
>> wrote:
>>
>> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
>>
>>
>> What is the purpose of having rte_atomic at all?
>> Is this level of indirection really helping?
>> [HONNAPPA] (not sure why this email has html format, converted to text
>> format)
>> I believe you meant, why not use the __atomic_xxx built-ins directly?
>> The only reason for now is handling of
>> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent to
>> rte_smp_mb which has an optimized implementation for x86. According to
>> Konstantin, the compiler does not generate optimal code. Wrapping that
>> built-in alone is going to be confusing.
>>
>> The wrappers also allow us to have our own implementation using inline
>> assembly for compilers versions that do not support C11 atomic built-
>> ins. But, I do not know if there is a need to support those versions.
> If I recall correctly, someone mentioned that one (or more) of the aging enterprise Linux distributions don't include a compiler with C11 atomics.
>
> I think Stephen is onto something here...
>
> It is silly to add wrappers like this, if the only purpose is to support compilers and distributions that don't properly support an official C standard which is nearly a decade old. The quality and quantity of the DPDK documentation for these functions (including examples, discussions on Stack Overflow, etc.) will be inferior to the documentation of the standard C11 atomics, which increases the probability of incorrect use.


What's being used in DPDK today, and what's being wrapped here, is not 
standard C11 atomics - it's a bunch of GCC built-ins. Nothing in the __ 
namespace is in the standard. It's reserved for the implementation (e.g. 
compiler).


> And if some compiler generates code that is suboptimal for a user, then it should be the choice of the user to either accept it or use a better compiler. Using a suboptimal compiler will not only affect the user's DPDK applications, but all applications developed by the user. And if he accepts it for his other applications, he will also accept it for his DPDK applications.
>
> We could introduce some sort of marker or standardized comment to indicate when functions only exist for backwards compatibility with ancient compilers and similar, with a reference to documentation describing why. And when the documented preconditions are no longer relevant, e.g. when those particular enterprise Linux distributions become obsolete, these functions become obsolete too, and should be removed. However, getting rid of obsolete cruft will break the ABI. In other words: Added cruft will never be removed again, so think twice before adding.
>
>
> Med venlig hilsen / kind regards
> - Morten Brørup
>
>
>
  
Mattias Rönnblom May 13, 2020, 7:25 p.m. UTC | #11
On 2020-05-12 20:20, Stephen Hemminger wrote:
> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <phil.yang@arm.com> wrote:
>> parameter. Signed-off-by: Phil Yang <phil.yang@arm.com 
>> <mailto:phil.yang@arm.com>>
>
>
> What is the purpose of having rte_atomic at all?
> Is this level of indirection really helping?
>

To allow a different implementation than the GCC built-ins, for certain 
functions and architectures. To allow extensions (i.e. atomic functions 
that aren't GCC built-ins) in a clean way. To avoid using GCC built-ins 
directly, both for cosmetic reasons, and that it might cause problem for 
future compilers.
  
Honnappa Nagarahalli May 13, 2020, 7:40 p.m. UTC | #12
<snip>

> >>
> >> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> >>
> >> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
> >> wrote:
> >>
> >> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> >>
> >>
> >> What is the purpose of having rte_atomic at all?
> >> Is this level of indirection really helping?
> >> [HONNAPPA] (not sure why this email has html format, converted to
> >> text
> >> format)
> >> I believe you meant, why not use the __atomic_xxx built-ins directly?
> >> The only reason for now is handling of
> >> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent
> >> to rte_smp_mb which has an optimized implementation for x86.
> >> According to Konstantin, the compiler does not generate optimal code.
> >> Wrapping that built-in alone is going to be confusing.
> >>
> >> The wrappers also allow us to have our own implementation using
> >> inline assembly for compilers versions that do not support C11 atomic
> >> built- ins. But, I do not know if there is a need to support those versions.
> > If I recall correctly, someone mentioned that one (or more) of the aging
> enterprise Linux distributions don't include a compiler with C11 atomics.
> >
> > I think Stephen is onto something here...
> >
> > It is silly to add wrappers like this, if the only purpose is to support
> compilers and distributions that don't properly support an official C standard
> which is nearly a decade old. The quality and quantity of the DPDK
> documentation for these functions (including examples, discussions on Stack
> Overflow, etc.) will be inferior to the documentation of the standard C11
> atomics, which increases the probability of incorrect use.
> 
> 
> What's being used in DPDK today, and what's being wrapped here, is not
> standard C11 atomics - it's a bunch of GCC built-ins. Nothing in the __
> namespace is in the standard. It's reserved for the implementation (e.g.
> compiler).
I have tried to understand what it mean by 'built-ins', but I have not got a good answer. So, does it mean that the built-in function (same symbol and API interface) may not be available in another C compiler? IMO, this is what matters for DPDK.
Currently, the same built-in functions are available in GCC and Clang.

> 
> 
> > And if some compiler generates code that is suboptimal for a user, then it
> should be the choice of the user to either accept it or use a better compiler.
> Using a suboptimal compiler will not only affect the user's DPDK applications,
> but all applications developed by the user. And if he accepts it for his other
> applications, he will also accept it for his DPDK applications.
> >
> > We could introduce some sort of marker or standardized comment to
> indicate when functions only exist for backwards compatibility with ancient
> compilers and similar, with a reference to documentation describing why. And
> when the documented preconditions are no longer relevant, e.g. when those
> particular enterprise Linux distributions become obsolete, these functions
> become obsolete too, and should be removed. However, getting rid of
> obsolete cruft will break the ABI. In other words: Added cruft will never be
> removed again, so think twice before adding.
> >
> >
> > Med venlig hilsen / kind regards
> > - Morten Brørup
> >
> >
> >
  
Mattias Rönnblom May 13, 2020, 8:17 p.m. UTC | #13
On 2020-05-13 21:40, Honnappa Nagarahalli wrote:
> <snip>
>
>>>> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
>>>>
>>>> On Tue, May 12, 2020 at 4:03 pm, Phil Yang <mailto:phil.yang@arm.com>
>>>> wrote:
>>>>
>>>> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
>>>>
>>>>
>>>> What is the purpose of having rte_atomic at all?
>>>> Is this level of indirection really helping?
>>>> [HONNAPPA] (not sure why this email has html format, converted to
>>>> text
>>>> format)
>>>> I believe you meant, why not use the __atomic_xxx built-ins directly?
>>>> The only reason for now is handling of
>>>> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is equivalent
>>>> to rte_smp_mb which has an optimized implementation for x86.
>>>> According to Konstantin, the compiler does not generate optimal code.
>>>> Wrapping that built-in alone is going to be confusing.
>>>>
>>>> The wrappers also allow us to have our own implementation using
>>>> inline assembly for compilers versions that do not support C11 atomic
>>>> built- ins. But, I do not know if there is a need to support those versions.
>>> If I recall correctly, someone mentioned that one (or more) of the aging
>> enterprise Linux distributions don't include a compiler with C11 atomics.
>>> I think Stephen is onto something here...
>>>
>>> It is silly to add wrappers like this, if the only purpose is to support
>> compilers and distributions that don't properly support an official C standard
>> which is nearly a decade old. The quality and quantity of the DPDK
>> documentation for these functions (including examples, discussions on Stack
>> Overflow, etc.) will be inferior to the documentation of the standard C11
>> atomics, which increases the probability of incorrect use.
>>
>>
>> What's being used in DPDK today, and what's being wrapped here, is not
>> standard C11 atomics - it's a bunch of GCC built-ins. Nothing in the __
>> namespace is in the standard. It's reserved for the implementation (e.g.
>> compiler).
> I have tried to understand what it mean by 'built-ins', but I have not got a good answer. So, does it mean that the built-in function (same symbol and API interface) may not be available in another C compiler? IMO, this is what matters for DPDK.
> Currently, the same built-in functions are available in GCC and Clang.


 From what I understand, "built-ins" is GCC terminology for 
non-standard, implementation-specific intrinsic functions, built into 
the compiler. They all reside in the __* namespace.


Since GCC is the industry standard, other compilers are likely to 
follow, including built-in functions.

>>
>>> And if some compiler generates code that is suboptimal for a user, then it
>> should be the choice of the user to either accept it or use a better compiler.
>> Using a suboptimal compiler will not only affect the user's DPDK applications,
>> but all applications developed by the user. And if he accepts it for his other
>> applications, he will also accept it for his DPDK applications.
>>> We could introduce some sort of marker or standardized comment to
>> indicate when functions only exist for backwards compatibility with ancient
>> compilers and similar, with a reference to documentation describing why. And
>> when the documented preconditions are no longer relevant, e.g. when those
>> particular enterprise Linux distributions become obsolete, these functions
>> become obsolete too, and should be removed. However, getting rid of
>> obsolete cruft will break the ABI. In other words: Added cruft will never be
>> removed again, so think twice before adding.
>>>
>>> Med venlig hilsen / kind regards
>>> - Morten Brørup
>>>
>>>
>>>
  
Morten Brørup May 14, 2020, 8:34 a.m. UTC | #14
+ Added people from the related discussion regarding the ARM roadmap [https://mails.dpdk.org/archives/dev/2020-April/162580.html].

> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Wednesday, May 13, 2020 10:17 PM
> 
> On 2020-05-13 21:40, Honnappa Nagarahalli wrote:
> > <snip>
> >
> >>>> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11
> atomics
> >>>>
> >>>> On Tue, May 12, 2020 at 4:03 pm, Phil Yang
> <mailto:phil.yang@arm.com>
> >>>> wrote:
> >>>>
> >>>> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> >>>>
> >>>>
> >>>> What is the purpose of having rte_atomic at all?
> >>>> Is this level of indirection really helping?
> >>>> [HONNAPPA] (not sure why this email has html format, converted to
> >>>> text
> >>>> format)
> >>>> I believe you meant, why not use the __atomic_xxx built-ins
> directly?
> >>>> The only reason for now is handling of
> >>>> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is
> equivalent
> >>>> to rte_smp_mb which has an optimized implementation for x86.
> >>>> According to Konstantin, the compiler does not generate optimal
> code.
> >>>> Wrapping that built-in alone is going to be confusing.
> >>>>
> >>>> The wrappers also allow us to have our own implementation using
> >>>> inline assembly for compilers versions that do not support C11
> atomic
> >>>> built- ins. But, I do not know if there is a need to support those
> versions.
> >>> If I recall correctly, someone mentioned that one (or more) of the
> aging
> >> enterprise Linux distributions don't include a compiler with C11
> atomics.
> >>> I think Stephen is onto something here...
> >>>
> >>> It is silly to add wrappers like this, if the only purpose is to
> support
> >> compilers and distributions that don't properly support an official
> C standard
> >> which is nearly a decade old. The quality and quantity of the DPDK
> >> documentation for these functions (including examples, discussions
> on Stack
> >> Overflow, etc.) will be inferior to the documentation of the
> standard C11
> >> atomics, which increases the probability of incorrect use.
> >>
> >>
> >> What's being used in DPDK today, and what's being wrapped here, is
> not
> >> standard C11 atomics - it's a bunch of GCC built-ins. Nothing in the
> __
> >> namespace is in the standard. It's reserved for the implementation
> (e.g.
> >> compiler).
> > I have tried to understand what it mean by 'built-ins', but I have
> not got a good answer. So, does it mean that the built-in function
> (same symbol and API interface) may not be available in another C
> compiler? IMO, this is what matters for DPDK.
> > Currently, the same built-in functions are available in GCC and
> Clang.
> 
> 
>  From what I understand, "built-ins" is GCC terminology for
> non-standard, implementation-specific intrinsic functions, built into
> the compiler. They all reside in the __* namespace.
> 
> 
> Since GCC is the industry standard, other compilers are likely to
> follow, including built-in functions.
> 

Timeline:

December 2011: The C11 standard was published [http://www.open-std.org/jtc1/sc22/wg14/www/standards.html].

March 2012: GCC 4.7 was released, introducing the __atomic built-ins [https://gcc.gnu.org/gcc-4.7/changes.html, https://www.gnu.org/software/gcc/gcc-4.7/].

March 2013: GCC 4.8 was released [https://www.gnu.org/software/gcc/gcc-4.8/].

April 2014: GCC 4.9 was released, introducing C11 atomics (incl. <stdatomic.h>) [https://gcc.gnu.org/gcc-4.9/changes.html, https://www.gnu.org/software/gcc/gcc-4.9/].

June 2014: RHEL7 was released [https://access.redhat.com/articles/3078]. (RHEL7 Beta was released in December 2013, which probably explains why the GA release doesn’t include GCC 4.9.)

May 2019 (i.e. one year ago): RHEL8 was released [https://access.redhat.com/articles/3078].


RHEL7 includes GCC 4.8 only [https://access.redhat.com/solutions/19458], and apparently RHEL7 has not been updated to GCC 4.9 with any of its minor releases.

Should the DPDK project be stuck on "industry standard" GCC atomics, unable to use the decade old "official standard" C11 atomics, only because we want to support a six year old enterprise Linux distribution? Red Hat released a new enterprise version a year ago... perhaps it's time for their customers to upgrade, if they want to use the latest and greatest version of DPDK.

Are all the other tools required for building DPDK (in the required versions) included in RHEL7, or do we require developers to install/upgrade any other tools anyway? If so, why not also GCC? DPDK can be used in a cross compilation environment, so we are not requiring RHEL7 users to replace their GCC 4.7 default compiler.


Furthermore, the DPDK Documentation specifies GCC 4.9+ as a system requirement [https://doc.dpdk.org/guides/linux_gsg/sys_reqs.html#compilation-of-the-dpdk]. If we are stuck on GCC 4.8, the documentation should be updated.


> >>
> >>> And if some compiler generates code that is suboptimal for a user,
> then it
> >> should be the choice of the user to either accept it or use a better
> compiler.
> >> Using a suboptimal compiler will not only affect the user's DPDK
> applications,
> >> but all applications developed by the user. And if he accepts it for
> his other
> >> applications, he will also accept it for his DPDK applications.
> >>> We could introduce some sort of marker or standardized comment to
> >> indicate when functions only exist for backwards compatibility with
> ancient
> >> compilers and similar, with a reference to documentation describing
> why. And
> >> when the documented preconditions are no longer relevant, e.g. when
> those
> >> particular enterprise Linux distributions become obsolete, these
> functions
> >> become obsolete too, and should be removed. However, getting rid of
> >> obsolete cruft will break the ABI. In other words: Added cruft will
> never be
> >> removed again, so think twice before adding.
  
Mattias Rönnblom May 14, 2020, 8:16 p.m. UTC | #15
On 2020-05-14 10:34, Morten Brørup wrote:
> + Added people from the related discussion regarding the ARM roadmap [https://protect2.fireeye.com/v1/url?k=10efdd7b-4e4f1ed2-10ef9de0-86959e472243-b772fef31e4ae6af&q=1&e=e3b0051e-bb23-4a30-84c7-7e5e80f83325&u=https%3A%2F%2Fmails.dpdk.org%2Farchives%2Fdev%2F2020-April%2F162580.html].
>
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Wednesday, May 13, 2020 10:17 PM
>>
>> On 2020-05-13 21:40, Honnappa Nagarahalli wrote:
>>> <snip>
>>>
>>>>>> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11
>> atomics
>>>>>> On Tue, May 12, 2020 at 4:03 pm, Phil Yang
>> <mailto:phil.yang@arm.com>
>>>>>> wrote:
>>>>>>
>>>>>> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
>>>>>>
>>>>>>
>>>>>> What is the purpose of having rte_atomic at all?
>>>>>> Is this level of indirection really helping?
>>>>>> [HONNAPPA] (not sure why this email has html format, converted to
>>>>>> text
>>>>>> format)
>>>>>> I believe you meant, why not use the __atomic_xxx built-ins
>> directly?
>>>>>> The only reason for now is handling of
>>>>>> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is
>> equivalent
>>>>>> to rte_smp_mb which has an optimized implementation for x86.
>>>>>> According to Konstantin, the compiler does not generate optimal
>> code.
>>>>>> Wrapping that built-in alone is going to be confusing.
>>>>>>
>>>>>> The wrappers also allow us to have our own implementation using
>>>>>> inline assembly for compilers versions that do not support C11
>> atomic
>>>>>> built- ins. But, I do not know if there is a need to support those
>> versions.
>>>>> If I recall correctly, someone mentioned that one (or more) of the
>> aging
>>>> enterprise Linux distributions don't include a compiler with C11
>> atomics.
>>>>> I think Stephen is onto something here...
>>>>>
>>>>> It is silly to add wrappers like this, if the only purpose is to
>> support
>>>> compilers and distributions that don't properly support an official
>> C standard
>>>> which is nearly a decade old. The quality and quantity of the DPDK
>>>> documentation for these functions (including examples, discussions
>> on Stack
>>>> Overflow, etc.) will be inferior to the documentation of the
>> standard C11
>>>> atomics, which increases the probability of incorrect use.
>>>>
>>>>
>>>> What's being used in DPDK today, and what's being wrapped here, is
>> not
>>>> standard C11 atomics - it's a bunch of GCC built-ins. Nothing in the
>> __
>>>> namespace is in the standard. It's reserved for the implementation
>> (e.g.
>>>> compiler).
>>> I have tried to understand what it mean by 'built-ins', but I have
>> not got a good answer. So, does it mean that the built-in function
>> (same symbol and API interface) may not be available in another C
>> compiler? IMO, this is what matters for DPDK.
>>> Currently, the same built-in functions are available in GCC and
>> Clang.
>>
>>
>>   From what I understand, "built-ins" is GCC terminology for
>> non-standard, implementation-specific intrinsic functions, built into
>> the compiler. They all reside in the __* namespace.
>>
>>
>> Since GCC is the industry standard, other compilers are likely to
>> follow, including built-in functions.
>>
> Timeline:
>
> December 2011: The C11 standard was published [https://protect2.fireeye.com/v1/url?k=8e23b012-d08373bb-8e23f089-86959e472243-a2babe7075f8ac38&q=1&e=e3b0051e-bb23-4a30-84c7-7e5e80f83325&u=http%3A%2F%2Fwww.open-std.org%2Fjtc1%2Fsc22%2Fwg14%2Fwww%2Fstandards.html].
>
> March 2012: GCC 4.7 was released, introducing the __atomic built-ins [https://gcc.gnu.org/gcc-4.7/changes.html, https://www.gnu.org/software/gcc/gcc-4.7/].
>
> March 2013: GCC 4.8 was released [https://www.gnu.org/software/gcc/gcc-4.8/].
>
> April 2014: GCC 4.9 was released, introducing C11 atomics (incl. <stdatomic.h>) [https://gcc.gnu.org/gcc-4.9/changes.html, https://www.gnu.org/software/gcc/gcc-4.9/].
>
> June 2014: RHEL7 was released [https://access.redhat.com/articles/3078]. (RHEL7 Beta was released in December 2013, which probably explains why the GA release doesn’t include GCC 4.9.)
>
> May 2019 (i.e. one year ago): RHEL8 was released [https://access.redhat.com/articles/3078].
>
>
> RHEL7 includes GCC 4.8 only [https://access.redhat.com/solutions/19458], and apparently RHEL7 has not been updated to GCC 4.9 with any of its minor releases.
>
> Should the DPDK project be stuck on "industry standard" GCC atomics, unable to use the decade old "official standard" C11 atomics, only because we want to support a six year old enterprise Linux distribution? Red Hat released a new enterprise version a year ago... perhaps it's time for their customers to upgrade, if they want to use the latest and greatest version of DPDK.


Just to be clear - I wasn't arguing for the direct use of GCC built-ins.


The GCC __atomic built-ins (called directly, or via a DPDK wrapper) do 
have some advantages over C11 atomics. One is that GCC supports 128-bit 
atomic operations, on certain architectures. <rte_atomic.h> already has 
a 128-bit compare-exchange. Also, since the GCC built-ins seem not to 
bother with architectures where atomics would be implemented by means of 
a lock, they are a little easier to use than <stdatomic.h>.


> Are all the other tools required for building DPDK (in the required versions) included in RHEL7, or do we require developers to install/upgrade any other tools anyway? If so, why not also GCC? DPDK can be used in a cross compilation environment, so we are not requiring RHEL7 users to replace their GCC 4.7 default compiler.
>
>
> Furthermore, the DPDK Documentation specifies GCC 4.9+ as a system requirement [https://protect2.fireeye.com/v1/url?k=339bad56-6d3b6eff-339bedcd-86959e472243-cb1bf3934c202e3f&q=1&e=e3b0051e-bb23-4a30-84c7-7e5e80f83325&u=https%3A%2F%2Fdoc.dpdk.org%2Fguides%2Flinux_gsg%2Fsys_reqs.html%23compilation-of-the-dpdk]. If we are stuck on GCC 4.8, the documentation should be updated.
>
>
>>>>> And if some compiler generates code that is suboptimal for a user,
>> then it
>>>> should be the choice of the user to either accept it or use a better
>> compiler.
>>>> Using a suboptimal compiler will not only affect the user's DPDK
>> applications,
>>>> but all applications developed by the user. And if he accepts it for
>> his other
>>>> applications, he will also accept it for his DPDK applications.
>>>>> We could introduce some sort of marker or standardized comment to
>>>> indicate when functions only exist for backwards compatibility with
>> ancient
>>>> compilers and similar, with a reference to documentation describing
>> why. And
>>>> when the documented preconditions are no longer relevant, e.g. when
>> those
>>>> particular enterprise Linux distributions become obsolete, these
>> functions
>>>> become obsolete too, and should be removed. However, getting rid of
>>>> obsolete cruft will break the ABI. In other words: Added cruft will
>> never be
>>>> removed again, so think twice before adding.
  
Honnappa Nagarahalli May 14, 2020, 9 p.m. UTC | #16
<snip>

> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11 atomics
> 
> On 2020-05-14 10:34, Morten Brørup wrote:
> > + Added people from the related discussion regarding the ARM roadmap
> [https://protect2.fireeye.com/v1/url?k=10efdd7b-4e4f1ed2-10ef9de0-
> 86959e472243-b772fef31e4ae6af&q=1&e=e3b0051e-bb23-4a30-84c7-
> 7e5e80f83325&u=https%3A%2F%2Fmails.dpdk.org%2Farchives%2Fdev%2F20
> 20-April%2F162580.html].
> >
> >> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> >> Sent: Wednesday, May 13, 2020 10:17 PM
> >>
> >> On 2020-05-13 21:40, Honnappa Nagarahalli wrote:
> >>> <snip>
> >>>
> >>>>>> Subject: Re: [PATCH v4 4/4] eal/atomic: add wrapper for c11
> >> atomics
> >>>>>> On Tue, May 12, 2020 at 4:03 pm, Phil Yang
> >> <mailto:phil.yang@arm.com>
> >>>>>> wrote:
> >>>>>>
> >>>>>> parameter. Signed-off-by: Phil Yang <mailto:phil.yang@arm.com>
> >>>>>>
> >>>>>>
> >>>>>> What is the purpose of having rte_atomic at all?
> >>>>>> Is this level of indirection really helping?
> >>>>>> [HONNAPPA] (not sure why this email has html format, converted to
> >>>>>> text
> >>>>>> format)
> >>>>>> I believe you meant, why not use the __atomic_xxx built-ins
> >> directly?
> >>>>>> The only reason for now is handling of
> >>>>>> __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is
> >> equivalent
> >>>>>> to rte_smp_mb which has an optimized implementation for x86.
> >>>>>> According to Konstantin, the compiler does not generate optimal
> >> code.
> >>>>>> Wrapping that built-in alone is going to be confusing.
> >>>>>>
> >>>>>> The wrappers also allow us to have our own implementation using
> >>>>>> inline assembly for compilers versions that do not support C11
> >> atomic
> >>>>>> built- ins. But, I do not know if there is a need to support
> >>>>>> those
> >> versions.
> >>>>> If I recall correctly, someone mentioned that one (or more) of the
> >> aging
> >>>> enterprise Linux distributions don't include a compiler with C11
> >> atomics.
> >>>>> I think Stephen is onto something here...
> >>>>>
> >>>>> It is silly to add wrappers like this, if the only purpose is to
> >> support
> >>>> compilers and distributions that don't properly support an official
> >> C standard
> >>>> which is nearly a decade old. The quality and quantity of the DPDK
> >>>> documentation for these functions (including examples, discussions
> >> on Stack
> >>>> Overflow, etc.) will be inferior to the documentation of the
> >> standard C11
> >>>> atomics, which increases the probability of incorrect use.
> >>>>
> >>>>
> >>>> What's being used in DPDK today, and what's being wrapped here, is
> >> not
> >>>> standard C11 atomics - it's a bunch of GCC built-ins. Nothing in
> >>>> the
> >> __
> >>>> namespace is in the standard. It's reserved for the implementation
> >> (e.g.
> >>>> compiler).
> >>> I have tried to understand what it mean by 'built-ins', but I have
> >> not got a good answer. So, does it mean that the built-in function
> >> (same symbol and API interface) may not be available in another C
> >> compiler? IMO, this is what matters for DPDK.
> >>> Currently, the same built-in functions are available in GCC and
> >> Clang.
> >>
> >>
> >>   From what I understand, "built-ins" is GCC terminology for
> >> non-standard, implementation-specific intrinsic functions, built into
> >> the compiler. They all reside in the __* namespace.
> >>
> >>
> >> Since GCC is the industry standard, other compilers are likely to
> >> follow, including built-in functions.
> >>
> > Timeline:
> >
> > December 2011: The C11 standard was published
> [https://protect2.fireeye.com/v1/url?k=8e23b012-d08373bb-8e23f089-
> 86959e472243-a2babe7075f8ac38&q=1&e=e3b0051e-bb23-4a30-84c7-
> 7e5e80f83325&u=http%3A%2F%2Fwww.open-
> std.org%2Fjtc1%2Fsc22%2Fwg14%2Fwww%2Fstandards.html].
> >
> > March 2012: GCC 4.7 was released, introducing the __atomic built-ins
> [https://gcc.gnu.org/gcc-4.7/changes.html,
> https://www.gnu.org/software/gcc/gcc-4.7/].
> >
> > March 2013: GCC 4.8 was released [https://www.gnu.org/software/gcc/gcc-
> 4.8/].
> >
> > April 2014: GCC 4.9 was released, introducing C11 atomics (incl.
> <stdatomic.h>) [https://gcc.gnu.org/gcc-4.9/changes.html,
> https://www.gnu.org/software/gcc/gcc-4.9/].
> >
> > June 2014: RHEL7 was released
> > [https://access.redhat.com/articles/3078]. (RHEL7 Beta was released in
> > December 2013, which probably explains why the GA release doesn’t
> > include GCC 4.9.)
> >
> > May 2019 (i.e. one year ago): RHEL8 was released
> [https://access.redhat.com/articles/3078].
> >
> >
> > RHEL7 includes GCC 4.8 only [https://access.redhat.com/solutions/19458],
> and apparently RHEL7 has not been updated to GCC 4.9 with any of its minor
> releases.
> >
> > Should the DPDK project be stuck on "industry standard" GCC atomics,
> unable to use the decade old "official standard" C11 atomics, only because
> we want to support a six year old enterprise Linux distribution? Red Hat
> released a new enterprise version a year ago... perhaps it's time for their
> customers to upgrade, if they want to use the latest and greatest version of
> DPDK.
> 
> 
> Just to be clear - I wasn't arguing for the direct use of GCC built-ins.
> 
> 
> The GCC __atomic built-ins (called directly, or via a DPDK wrapper) do have
> some advantages over C11 atomics. One is that GCC supports 128-bit atomic
> operations, on certain architectures. <rte_atomic.h> already has a 128-bit
> compare-exchange. Also, since the GCC built-ins seem not to bother with
> architectures where atomics would be implemented by means of a lock, they
> are a little easier to use than <stdatomic.h>.
IMO, I do not think we should focus on built-ins vs APIs.

1) Built-ins are supported by both GCC and Clang today. If there is a new compiler in the future, most likely it will support these built-ins.
2) I like the fact that the built-ins always require the memory order parameter. stdatomic.h provides some APIs which do not need memory order (just like rte_atomicNN_xxx APIs). This needs us to implement checks in checkpatch script to avoid using such APIs.
3) If we need to replace the built-ins with APIs in the future, it is a simple search and replace.

If the decision to go with built-ins, turns out to be a bad decision, it can be corrected easily.

I think we should focus on the compiler not generating optimal code for __atomic_thread_fence(__ATOMIC_SEQ_CST) for x86. This is the main reason for these wrappers. From what I have seen, DPDK has tried to provide solutions internally for performance issues caused by compilers.
Given that we have provided 'rte_atomic128_cmp_exchange' (provided because both the compilers were not generating the 128b compare-exchange), I would say we should just provide wrapper for '__atomic_thread_fence' built-in.

> 
> 
> > Are all the other tools required for building DPDK (in the required versions)
> included in RHEL7, or do we require developers to install/upgrade any other
> tools anyway? If so, why not also GCC? DPDK can be used in a cross
> compilation environment, so we are not requiring RHEL7 users to replace
> their GCC 4.7 default compiler.
I have not used RHEL7, Intel CI uses RHEL7, may be they can answer.

> >
> >
> > Furthermore, the DPDK Documentation specifies GCC 4.9+ as a system
> requirement [https://protect2.fireeye.com/v1/url?k=339bad56-6d3b6eff-
> 339bedcd-86959e472243-cb1bf3934c202e3f&q=1&e=e3b0051e-bb23-4a30-
> 84c7-
> 7e5e80f83325&u=https%3A%2F%2Fdoc.dpdk.org%2Fguides%2Flinux_gsg%2F
> sys_reqs.html%23compilation-of-the-dpdk]. If we are stuck on GCC 4.8, the
> documentation should be updated.
This is interesting. Then the CI systems should be upgraded to use GCC 4.9+.

> >
> >
> >>>>> And if some compiler generates code that is suboptimal for a user,
> >> then it
> >>>> should be the choice of the user to either accept it or use a
> >>>> better
> >> compiler.
> >>>> Using a suboptimal compiler will not only affect the user's DPDK
> >> applications,
> >>>> but all applications developed by the user. And if he accepts it
> >>>> for
> >> his other
> >>>> applications, he will also accept it for his DPDK applications.
> >>>>> We could introduce some sort of marker or standardized comment to
> >>>> indicate when functions only exist for backwards compatibility with
> >> ancient
> >>>> compilers and similar, with a reference to documentation describing
> >> why. And
> >>>> when the documented preconditions are no longer relevant, e.g. when
> >> those
> >>>> particular enterprise Linux distributions become obsolete, these
> >> functions
> >>>> become obsolete too, and should be removed. However, getting rid of
> >>>> obsolete cruft will break the ABI. In other words: Added cruft will
> >> never be
> >>>> removed again, so think twice before adding.
>
  

Patch

diff --git a/lib/librte_eal/include/generic/rte_atomic_c11.h b/lib/librte_eal/include/generic/rte_atomic_c11.h
new file mode 100644
index 0000000..20490f4
--- /dev/null
+++ b/lib/librte_eal/include/generic/rte_atomic_c11.h
@@ -0,0 +1,139 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_ATOMIC_C11_H_
+#define _RTE_ATOMIC_C11_H_
+
+#include <rte_common.h>
+
+/**
+ * @file
+ * c11 atomic operations
+ *
+ * This file wraps up compiler (GCC) c11 atomic built-ins.
+ * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
+ */
+
+#define memory_order_relaxed __ATOMIC_RELAXED
+#define memory_order_consume __ATOMIC_CONSUME
+#define memory_order_acquire __ATOMIC_ACQUIRE
+#define memory_order_release __ATOMIC_RELEASE
+#define memory_order_acq_rel __ATOMIC_ACQ_REL
+#define memory_order_seq_cst __ATOMIC_SEQ_CST
+
+/* Generic atomic load.
+ * It returns the contents of *PTR.
+ *
+ * The valid memory order variants are:
+ * memory_order_relaxed
+ * memory_order_consume
+ * memory_order_acquire
+ * memory_order_seq_cst
+ */
+#define rte_atomic_load(PTR, MO)			\
+	(__extension__ ({				\
+		typeof(PTR) _ptr = (PTR);		\
+		typeof(*_ptr) _ret;			\
+		__atomic_load(_ptr, &_ret, (MO));	\
+		_ret;					\
+	}))
+
+/* Generic atomic store.
+ * It stores the value of VAL into *PTR.
+ *
+ * The valid memory order variants are:
+ * memory_order_relaxed
+ * memory_order_release
+ * memory_order_seq_cst
+ */
+#define rte_atomic_store(PTR, VAL, MO)			\
+	(__extension__ ({				\
+		typeof(PTR) _ptr = (PTR);		\
+		typeof(*_ptr) _val = (VAL);		\
+		__atomic_store(_ptr, &_val, (MO));	\
+	}))
+
+/* Generic atomic exchange.
+ * It stores the value of VAL into *PTR.
+ * It returns the original value of *PTR.
+ *
+ * The valid memory order variants are:
+ * memory_order_relaxed
+ * memory_order_acquire
+ * memory_order_release
+ * memory_order_acq_rel
+ * memory_order_seq_cst
+ */
+#define rte_atomic_exchange(PTR, VAL, MO)			\
+	(__extension__ ({					\
+		typeof(PTR) _ptr = (PTR);			\
+		typeof(*_ptr) _val = (VAL);			\
+		typeof(*_ptr) _ret;				\
+		__atomic_exchange(_ptr, &_val, &_ret, (MO));	\
+		_ret;						\
+	}))
+
+/* Generic atomic compare and exchange.
+ * It compares the contents of *PTR with the contents of *EXP.
+ * If equal, the operation is a read-modify-write operation that
+ * writes DES into *PTR.
+ * If they are not equal, the operation is a read and the current
+ * contents of *PTR are written into *EXP.
+ *
+ * The weak compare_exchange may fail spuriously and the strong
+ * variation will never fails spuriously.
+ *
+ * If DES is written into *PTR then true is returned and memory is
+ * affected according to the memory order specified by SUC_MO.
+ * There are no restrictions on what memory order can be used here.
+ *
+ * Otherwise, false is returned and memory is affected according to
+ * FAIL_MO. This memory order cannot be memory_order_release nor
+ * memory_order_acq_rel. It also cannot be a stronger order than that
+ * specified by SUC_MO.
+ */
+#define rte_atomic_compare_exchange_weak(PTR, EXP, DES, SUC_MO, FAIL_MO)    \
+	(__extension__ ({						    \
+		typeof(PTR) _ptr = (PTR);				    \
+		typeof(*_ptr) _des = (DES);				    \
+		__atomic_compare_exchange(_ptr, (EXP), &_des, 1,	    \
+				 (SUC_MO), (FAIL_MO));			    \
+	}))
+
+#define rte_atomic_compare_exchange_strong(PTR, EXP, DES, SUC_MO, FAIL_MO)  \
+	(__extension__ ({						    \
+		typeof(PTR) _ptr = (PTR);				    \
+		typeof(*_ptr) _des = (DES);				    \
+		__atomic_compare_exchange(_ptr, (EXP), &_des, 0,	    \
+				 (SUC_MO), (FAIL_MO));			    \
+	}))
+
+#define rte_atomic_fetch_add(PTR, VAL, MO)		\
+	__atomic_fetch_add((PTR), (VAL), (MO))
+#define rte_atomic_fetch_sub(PTR, VAL, MO)		\
+	__atomic_fetch_sub((PTR), (VAL), (MO))
+#define rte_atomic_fetch_or(PTR, VAL, MO)		\
+	__atomic_fetch_or((PTR), (VAL), (MO))
+#define rte_atomic_fetch_xor(PTR, VAL, MO)		\
+	__atomic_fetch_xor((PTR), (VAL), (MO))
+#define rte_atomic_fetch_and(PTR, VAL, MO)		\
+	__atomic_fetch_and((PTR), (VAL), (MO))
+
+#define rte_atomic_add_fetch(PTR, VAL, MO)		\
+	__atomic_add_fetch((PTR), (VAL), (MO))
+#define rte_atomic_sub_fetch(PTR, VAL, MO)		\
+	__atomic_sub_fetch((PTR), (VAL), (MO))
+#define rte_atomic_or_fetch(PTR, VAL, MO)		\
+	__atomic_or_fetch((PTR), (VAL), (MO))
+#define rte_atomic_xor_fetch(PTR, VAL, MO)		\
+	__atomic_xor_fetch((PTR), (VAL), (MO))
+#define rte_atomic_and_fetch(PTR, VAL, MO)		\
+	__atomic_and_fetch((PTR), (VAL), (MO))
+
+/* Synchronization fence between threads based on
+ * the specified memory order.
+ */
+#define rte_atomic_thread_fence(MO) __atomic_thread_fence((MO))
+
+#endif /* _RTE_ATOMIC_C11_H_ */
diff --git a/lib/librte_eal/include/meson.build b/lib/librte_eal/include/meson.build
index bc73ec2..dac1aac 100644
--- a/lib/librte_eal/include/meson.build
+++ b/lib/librte_eal/include/meson.build
@@ -51,6 +51,7 @@  headers += files(
 # special case install the generic headers, since they go in a subdir
 generic_headers = files(
 	'generic/rte_atomic.h',
+	'generic/rte_atomic_c11.h',
 	'generic/rte_byteorder.h',
 	'generic/rte_cpuflags.h',
 	'generic/rte_cycles.h',