[1/2] acl: remove use of weak functions
Checks
Commit Message
Weak functions don't work well with static libraries and require the use of
"whole-archive" flag to ensure that the correct function is used when
linking. Since the weak functions are only used as placeholders within
this library alone, we can replace them with non-weak functions using
preprocessor ifdefs.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
lib/librte_acl/meson.build | 7 ++++++-
lib/librte_acl/rte_acl.c | 18 ++++++++++++++----
mk/rte.app.mk | 3 ---
3 files changed, 20 insertions(+), 8 deletions(-)
Comments
Bruce Richardson <bruce.richardson@intel.com> writes:
> Weak functions don't work well with static libraries and require the use of
> "whole-archive" flag to ensure that the correct function is used when
> linking. Since the weak functions are only used as placeholders within
> this library alone, we can replace them with non-weak functions using
> preprocessor ifdefs.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> lib/librte_acl/meson.build | 7 ++++++-
> lib/librte_acl/rte_acl.c | 18 ++++++++++++++----
> mk/rte.app.mk | 3 ---
> 3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
> index 2207dbafe..98ece7d85 100644
> --- a/lib/librte_acl/meson.build
> +++ b/lib/librte_acl/meson.build
> @@ -6,7 +6,7 @@ sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c',
> 'rte_acl.c', 'tb_mem.c')
> headers = files('rte_acl.h', 'rte_acl_osdep.h')
>
> -if arch_subdir == 'x86'
> +if dpdk_conf.has('RTE_ARCH_X86')
> sources += files('acl_run_sse.c')
>
> # compile AVX2 version if either:
> @@ -28,4 +28,9 @@ if arch_subdir == 'x86'
> cflags += '-DCC_AVX2_SUPPORT'
> endif
>
> +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
> + cflags += '-flax-vector-conversions'
> + sources += files('acl_run_neon.c')
This will also need -Wno-uninitialized (otherwise it will generate
warnings about the search_neon_4 and search_neon_8 functions).
But I don't like papering over these conversions. I'd prefer instead
the patches I posted at:
http://mails.dpdk.org/archives/dev/2019-April/129540.html
and
http://mails.dpdk.org/archives/dev/2019-April/129541.html
Are you opposed to merging those?
> +elif dpdk_conf.has('RTE_ARCH_PPC_64')
> + sources += files('acl_run_altivec.c')
> endif
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index c436a9bfd..fd5bd5e4e 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -13,11 +13,13 @@ static struct rte_tailq_elem rte_acl_tailq = {
> };
> EAL_REGISTER_TAILQ(rte_acl_tailq)
>
> +#ifndef RTE_ARCH_X86
> +#ifndef CC_AVX2_SUPPORT
> /*
> * If the compiler doesn't support AVX2 instructions,
> * then the dummy one would be used instead for AVX2 classify method.
> */
> -__rte_weak int
> +int
> rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -26,8 +28,9 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> -__rte_weak int
> +int
> rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -36,8 +39,11 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> -__rte_weak int
> +#ifndef RTE_ARCH_ARM
> +#ifndef RTE_ARCH_ARM64
> +int
> rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -46,8 +52,11 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
> +#endif
>
> -__rte_weak int
> +#ifndef RTE_ARCH_PPC_64
> +int
> rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -56,6 +65,7 @@ rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> static const rte_acl_classify_t classify_fns[] = {
> [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 7d994bece..fdec636b4 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -46,10 +46,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor
> _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag
> _LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
> _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm
> -# librte_acl needs --whole-archive because of weak functions
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson
I think I have a solution for this that can use the weak aliasing and
not require the use of the whole-archive flag. Would you prefer that?
On Wed, Apr 10, 2019 at 09:54:02AM -0400, Aaron Conole wrote:
> Bruce Richardson <bruce.richardson@intel.com> writes:
>
> > Weak functions don't work well with static libraries and require the
> > use of "whole-archive" flag to ensure that the correct function is used
> > when linking. Since the weak functions are only used as placeholders
> > within this library alone, we can replace them with non-weak functions
> > using preprocessor ifdefs.
> >
> > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> ---
> > lib/librte_acl/meson.build | 7 ++++++- lib/librte_acl/rte_acl.c | 18
> > ++++++++++++++---- mk/rte.app.mk | 3 --- 3 files changed,
> > 20 insertions(+), 8 deletions(-)
> >
> > diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
> > index 2207dbafe..98ece7d85 100644 --- a/lib/librte_acl/meson.build +++
> > b/lib/librte_acl/meson.build @@ -6,7 +6,7 @@ sources =
> > files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c', 'rte_acl.c',
> > 'tb_mem.c') headers = files('rte_acl.h', 'rte_acl_osdep.h')
> >
> > -if arch_subdir == 'x86' +if dpdk_conf.has('RTE_ARCH_X86') sources +=
> > files('acl_run_sse.c')
> >
> > # compile AVX2 version if either: @@ -28,4 +28,9 @@ if arch_subdir
> > == 'x86' cflags += '-DCC_AVX2_SUPPORT' endif
> >
> > +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
> > + cflags += '-flax-vector-conversions' + sources +=
> > files('acl_run_neon.c')
>
> This will also need -Wno-uninitialized (otherwise it will generate
> warnings about the search_neon_4 and search_neon_8 functions).
>
> But I don't like papering over these conversions. I'd prefer instead the
> patches I posted at:
>
> http://mails.dpdk.org/archives/dev/2019-April/129540.html and
> http://mails.dpdk.org/archives/dev/2019-April/129541.html
>
> Are you opposed to merging those?
>
Nope, not in the least. I'm happy enough to rework this patch on top of
those - I'd just had forgotten about them in my rush to get a potential
solution out here. I did these up quickly to show how easy it is to remove
the need for the weak functions and the subsequent linker "--whole-archive"
flag.
/Bruce
PS: I see your patch 2 does not include the Wno-uninitialized flag, is it
not needed in your patch, or just an oversight?
Bruce Richardson <bruce.richardson@intel.com> writes:
> On Wed, Apr 10, 2019 at 09:54:02AM -0400, Aaron Conole wrote:
>> Bruce Richardson <bruce.richardson@intel.com> writes:
>>
>> > Weak functions don't work well with static libraries and require the
>> > use of "whole-archive" flag to ensure that the correct function is used
>> > when linking. Since the weak functions are only used as placeholders
>> > within this library alone, we can replace them with non-weak functions
>> > using preprocessor ifdefs.
>> >
>> > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> ---
>> > lib/librte_acl/meson.build | 7 ++++++- lib/librte_acl/rte_acl.c | 18
>> > ++++++++++++++---- mk/rte.app.mk | 3 --- 3 files changed,
>> > 20 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
>> > index 2207dbafe..98ece7d85 100644 --- a/lib/librte_acl/meson.build +++
>> > b/lib/librte_acl/meson.build @@ -6,7 +6,7 @@ sources =
>> > files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c', 'rte_acl.c',
>> > 'tb_mem.c') headers = files('rte_acl.h', 'rte_acl_osdep.h')
>> >
>> > -if arch_subdir == 'x86' +if dpdk_conf.has('RTE_ARCH_X86') sources +=
>> > files('acl_run_sse.c')
>> >
>> > # compile AVX2 version if either: @@ -28,4 +28,9 @@ if arch_subdir
>> > == 'x86' cflags += '-DCC_AVX2_SUPPORT' endif
>> >
>> > +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
>> > + cflags += '-flax-vector-conversions' + sources +=
>> > files('acl_run_neon.c')
>>
>> This will also need -Wno-uninitialized (otherwise it will generate
>> warnings about the search_neon_4 and search_neon_8 functions).
>>
>> But I don't like papering over these conversions. I'd prefer instead the
>> patches I posted at:
>>
>> http://mails.dpdk.org/archives/dev/2019-April/129540.html and
>> http://mails.dpdk.org/archives/dev/2019-April/129541.html
>>
>> Are you opposed to merging those?
>>
> Nope, not in the least. I'm happy enough to rework this patch on top of
> those - I'd just had forgotten about them in my rush to get a potential
> solution out here. I did these up quickly to show how easy it is to remove
> the need for the weak functions and the subsequent linker "--whole-archive"
> flag.
>
> /Bruce
>
> PS: I see your patch 2 does not include the Wno-uninitialized flag, is it
> not needed in your patch, or just an oversight?
It isn't needed, I resolved the issue with an explicit initialization.
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Wednesday, April 10, 2019 2:45 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; aconole@redhat.com
> Cc: dev@dpdk.org; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: [PATCH 1/2] acl: remove use of weak functions
>
> Weak functions don't work well with static libraries and require the use of
> "whole-archive" flag to ensure that the correct function is used when
> linking. Since the weak functions are only used as placeholders within
> this library alone, we can replace them with non-weak functions using
> preprocessor ifdefs.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> lib/librte_acl/meson.build | 7 ++++++-
> lib/librte_acl/rte_acl.c | 18 ++++++++++++++----
> mk/rte.app.mk | 3 ---
> 3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
> index 2207dbafe..98ece7d85 100644
> --- a/lib/librte_acl/meson.build
> +++ b/lib/librte_acl/meson.build
> @@ -6,7 +6,7 @@ sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c',
> 'rte_acl.c', 'tb_mem.c')
> headers = files('rte_acl.h', 'rte_acl_osdep.h')
>
> -if arch_subdir == 'x86'
> +if dpdk_conf.has('RTE_ARCH_X86')
> sources += files('acl_run_sse.c')
>
> # compile AVX2 version if either:
> @@ -28,4 +28,9 @@ if arch_subdir == 'x86'
> cflags += '-DCC_AVX2_SUPPORT'
> endif
>
> +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
> + cflags += '-flax-vector-conversions'
> + sources += files('acl_run_neon.c')
> +elif dpdk_conf.has('RTE_ARCH_PPC_64')
> + sources += files('acl_run_altivec.c')
> endif
> diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
> index c436a9bfd..fd5bd5e4e 100644
> --- a/lib/librte_acl/rte_acl.c
> +++ b/lib/librte_acl/rte_acl.c
> @@ -13,11 +13,13 @@ static struct rte_tailq_elem rte_acl_tailq = {
> };
> EAL_REGISTER_TAILQ(rte_acl_tailq)
>
> +#ifndef RTE_ARCH_X86
> +#ifndef CC_AVX2_SUPPORT
> /*
> * If the compiler doesn't support AVX2 instructions,
> * then the dummy one would be used instead for AVX2 classify method.
> */
> -__rte_weak int
> +int
> rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -26,8 +28,9 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> -__rte_weak int
> +int
> rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -36,8 +39,11 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> -__rte_weak int
> +#ifndef RTE_ARCH_ARM
> +#ifndef RTE_ARCH_ARM64
> +int
> rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -46,8 +52,11 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
> +#endif
>
> -__rte_weak int
> +#ifndef RTE_ARCH_PPC_64
> +int
> rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
> __rte_unused const uint8_t **data,
> __rte_unused uint32_t *results,
> @@ -56,6 +65,7 @@ rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
> {
> return -ENOTSUP;
> }
> +#endif
>
> static const rte_acl_classify_t classify_fns[] = {
> [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 7d994bece..fdec636b4 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -46,10 +46,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor
> _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag
> _LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
> _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm
> -# librte_acl needs --whole-archive because of weak functions
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive
> _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson
> --
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Tested-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 2.20.1
@@ -6,7 +6,7 @@ sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c',
'rte_acl.c', 'tb_mem.c')
headers = files('rte_acl.h', 'rte_acl_osdep.h')
-if arch_subdir == 'x86'
+if dpdk_conf.has('RTE_ARCH_X86')
sources += files('acl_run_sse.c')
# compile AVX2 version if either:
@@ -28,4 +28,9 @@ if arch_subdir == 'x86'
cflags += '-DCC_AVX2_SUPPORT'
endif
+elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
+ cflags += '-flax-vector-conversions'
+ sources += files('acl_run_neon.c')
+elif dpdk_conf.has('RTE_ARCH_PPC_64')
+ sources += files('acl_run_altivec.c')
endif
@@ -13,11 +13,13 @@ static struct rte_tailq_elem rte_acl_tailq = {
};
EAL_REGISTER_TAILQ(rte_acl_tailq)
+#ifndef RTE_ARCH_X86
+#ifndef CC_AVX2_SUPPORT
/*
* If the compiler doesn't support AVX2 instructions,
* then the dummy one would be used instead for AVX2 classify method.
*/
-__rte_weak int
+int
rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
__rte_unused const uint8_t **data,
__rte_unused uint32_t *results,
@@ -26,8 +28,9 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
{
return -ENOTSUP;
}
+#endif
-__rte_weak int
+int
rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
__rte_unused const uint8_t **data,
__rte_unused uint32_t *results,
@@ -36,8 +39,11 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
{
return -ENOTSUP;
}
+#endif
-__rte_weak int
+#ifndef RTE_ARCH_ARM
+#ifndef RTE_ARCH_ARM64
+int
rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
__rte_unused const uint8_t **data,
__rte_unused uint32_t *results,
@@ -46,8 +52,11 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
{
return -ENOTSUP;
}
+#endif
+#endif
-__rte_weak int
+#ifndef RTE_ARCH_PPC_64
+int
rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
__rte_unused const uint8_t **data,
__rte_unused uint32_t *results,
@@ -56,6 +65,7 @@ rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
{
return -ENOTSUP;
}
+#endif
static const rte_acl_classify_t classify_fns[] = {
[RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
@@ -46,10 +46,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor
_LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag
_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm
-# librte_acl needs --whole-archive because of weak functions
-_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl
-_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed
_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson