[v6,03/13] vhost: try to unroll for each loop
Checks
Commit Message
Create macro for adding unroll pragma before for each loop. Batch
functions will be contained of several small loops which can be
optimized by compilers' loop unrolling pragma.
Signed-off-by: Marvin Liu <yong.liu@intel.com>
Comments
Hi Marvin,
On 10/15/19 6:07 PM, Marvin Liu wrote:
> Create macro for adding unroll pragma before for each loop. Batch
> functions will be contained of several small loops which can be
> optimized by compilers' loop unrolling pragma.
>
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
>
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index 8623e91c0..30839a001 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -16,6 +16,24 @@ CFLAGS += -I vhost_user
> CFLAGS += -fno-strict-aliasing
> LDLIBS += -lpthread
>
> +ifeq ($(RTE_TOOLCHAIN), gcc)
> +ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
> +CFLAGS += -DSUPPORT_GCC_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), clang)
> +ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
> +CFLAGS += -DSUPPORT_CLANG_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), icc)
> +ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
> +CFLAGS += -DSUPPORT_ICC_UNROLL_PRAGMA
> +endif
> +endif
> +
> ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
> LDLIBS += -lnuma
> endif
> diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
> index cb1123ae3..ddf0ee579 100644
> --- a/lib/librte_vhost/meson.build
> +++ b/lib/librte_vhost/meson.build
> @@ -8,6 +8,13 @@ endif
> if has_libnuma == 1
> dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
> endif
> +if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
> + cflags += '-DSUPPORT_GCC_UNROLL_PRAGMA'
> +elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
> + cflags += '-DSUPPORT_CLANG_UNROLL_PRAGMA'
> +elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
> + cflags += '-DSUPPORT_ICC_UNROLL_PRAGMA'
> +endif
> dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
> cc.has_header('linux/userfaultfd.h'))
> version = 4
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 22a3ddc38..18d01cb19 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -39,6 +39,30 @@
>
> #define VHOST_LOG_CACHE_NR 32
>
> +#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
> + sizeof(struct vring_packed_desc))
> +#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
> +
> +#ifdef SUPPORT_GCC_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef SUPPORT_CLANG_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef SUPPORT_ICC_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
> + for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifndef for_each_try_unroll
> +#define for_each_try_unroll(iter, val, num) \
> + for (iter = val; iter < num; iter++)
> +#endif
> +
> /**
> * Structure contains buffer address, length and descriptor index
> * from vring to do scatter RX.
>
As it is Vhost specific, please prefix all the defines and macros with
VHOST_.
Thanks,
Maxime
@@ -16,6 +16,24 @@ CFLAGS += -I vhost_user
CFLAGS += -fno-strict-aliasing
LDLIBS += -lpthread
+ifeq ($(RTE_TOOLCHAIN), gcc)
+ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
+CFLAGS += -DSUPPORT_GCC_UNROLL_PRAGMA
+endif
+endif
+
+ifeq ($(RTE_TOOLCHAIN), clang)
+ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
+CFLAGS += -DSUPPORT_CLANG_UNROLL_PRAGMA
+endif
+endif
+
+ifeq ($(RTE_TOOLCHAIN), icc)
+ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
+CFLAGS += -DSUPPORT_ICC_UNROLL_PRAGMA
+endif
+endif
+
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
LDLIBS += -lnuma
endif
@@ -8,6 +8,13 @@ endif
if has_libnuma == 1
dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
endif
+if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
+ cflags += '-DSUPPORT_GCC_UNROLL_PRAGMA'
+elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
+ cflags += '-DSUPPORT_CLANG_UNROLL_PRAGMA'
+elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
+ cflags += '-DSUPPORT_ICC_UNROLL_PRAGMA'
+endif
dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
cc.has_header('linux/userfaultfd.h'))
version = 4
@@ -39,6 +39,30 @@
#define VHOST_LOG_CACHE_NR 32
+#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
+ sizeof(struct vring_packed_desc))
+#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
+
+#ifdef SUPPORT_GCC_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
+ for (iter = val; iter < size; iter++)
+#endif
+
+#ifdef SUPPORT_CLANG_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
+ for (iter = val; iter < size; iter++)
+#endif
+
+#ifdef SUPPORT_ICC_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
+ for (iter = val; iter < size; iter++)
+#endif
+
+#ifndef for_each_try_unroll
+#define for_each_try_unroll(iter, val, num) \
+ for (iter = val; iter < num; iter++)
+#endif
+
/**
* Structure contains buffer address, length and descriptor index
* from vring to do scatter RX.