[v6,03/13] vhost: try to unroll for each loop

Message ID 20191015160739.51940-4-yong.liu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost packed ring performance optimization |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Marvin Liu Oct. 15, 2019, 4:07 p.m. UTC
  Create macro for adding unroll pragma before for each loop. Batch
functions will be contained of several small loops which can be
optimized by compilers' loop unrolling pragma.

Signed-off-by: Marvin Liu <yong.liu@intel.com>
  

Comments

Maxime Coquelin Oct. 16, 2019, 10:30 a.m. UTC | #1
Hi Marvin,

On 10/15/19 6:07 PM, Marvin Liu wrote:
> Create macro for adding unroll pragma before for each loop. Batch
> functions will be contained of several small loops which can be
> optimized by compilers' loop unrolling pragma.
> 
> Signed-off-by: Marvin Liu <yong.liu@intel.com>
> 
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index 8623e91c0..30839a001 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -16,6 +16,24 @@ CFLAGS += -I vhost_user
>  CFLAGS += -fno-strict-aliasing
>  LDLIBS += -lpthread
>  
> +ifeq ($(RTE_TOOLCHAIN), gcc)
> +ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
> +CFLAGS += -DSUPPORT_GCC_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), clang)
> +ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
> +CFLAGS += -DSUPPORT_CLANG_UNROLL_PRAGMA
> +endif
> +endif
> +
> +ifeq ($(RTE_TOOLCHAIN), icc)
> +ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
> +CFLAGS += -DSUPPORT_ICC_UNROLL_PRAGMA
> +endif
> +endif
> +
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
>  LDLIBS += -lnuma
>  endif
> diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
> index cb1123ae3..ddf0ee579 100644
> --- a/lib/librte_vhost/meson.build
> +++ b/lib/librte_vhost/meson.build
> @@ -8,6 +8,13 @@ endif
>  if has_libnuma == 1
>  	dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
>  endif
> +if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
> +	cflags += '-DSUPPORT_GCC_UNROLL_PRAGMA'
> +elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
> +	cflags += '-DSUPPORT_CLANG_UNROLL_PRAGMA'
> +elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
> +	cflags += '-DSUPPORT_ICC_UNROLL_PRAGMA'
> +endif
>  dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
>  	      cc.has_header('linux/userfaultfd.h'))
>  version = 4
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 22a3ddc38..18d01cb19 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -39,6 +39,30 @@
>  
>  #define VHOST_LOG_CACHE_NR 32
>  
> +#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
> +			    sizeof(struct vring_packed_desc))
> +#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
> +
> +#ifdef SUPPORT_GCC_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
> +	for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef SUPPORT_CLANG_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
> +	for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifdef SUPPORT_ICC_UNROLL_PRAGMA
> +#define for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
> +	for (iter = val; iter < size; iter++)
> +#endif
> +
> +#ifndef for_each_try_unroll
> +#define for_each_try_unroll(iter, val, num) \
> +	for (iter = val; iter < num; iter++)
> +#endif
> +
>  /**
>   * Structure contains buffer address, length and descriptor index
>   * from vring to do scatter RX.
> 

As it is Vhost specific, please prefix all the defines and macros with
VHOST_.

Thanks,
Maxime
  

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 8623e91c0..30839a001 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -16,6 +16,24 @@  CFLAGS += -I vhost_user
 CFLAGS += -fno-strict-aliasing
 LDLIBS += -lpthread
 
+ifeq ($(RTE_TOOLCHAIN), gcc)
+ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
+CFLAGS += -DSUPPORT_GCC_UNROLL_PRAGMA
+endif
+endif
+
+ifeq ($(RTE_TOOLCHAIN), clang)
+ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
+CFLAGS += -DSUPPORT_CLANG_UNROLL_PRAGMA
+endif
+endif
+
+ifeq ($(RTE_TOOLCHAIN), icc)
+ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
+CFLAGS += -DSUPPORT_ICC_UNROLL_PRAGMA
+endif
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index cb1123ae3..ddf0ee579 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -8,6 +8,13 @@  endif
 if has_libnuma == 1
 	dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
 endif
+if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
+	cflags += '-DSUPPORT_GCC_UNROLL_PRAGMA'
+elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
+	cflags += '-DSUPPORT_CLANG_UNROLL_PRAGMA'
+elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
+	cflags += '-DSUPPORT_ICC_UNROLL_PRAGMA'
+endif
 dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
 	      cc.has_header('linux/userfaultfd.h'))
 version = 4
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 22a3ddc38..18d01cb19 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -39,6 +39,30 @@ 
 
 #define VHOST_LOG_CACHE_NR 32
 
+#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
+			    sizeof(struct vring_packed_desc))
+#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
+
+#ifdef SUPPORT_GCC_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
+	for (iter = val; iter < size; iter++)
+#endif
+
+#ifdef SUPPORT_CLANG_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
+	for (iter = val; iter < size; iter++)
+#endif
+
+#ifdef SUPPORT_ICC_UNROLL_PRAGMA
+#define for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
+	for (iter = val; iter < size; iter++)
+#endif
+
+#ifndef for_each_try_unroll
+#define for_each_try_unroll(iter, val, num) \
+	for (iter = val; iter < num; iter++)
+#endif
+
 /**
  * Structure contains buffer address, length and descriptor index
  * from vring to do scatter RX.