[09/27] event/dlb: add inline functions used in multiple files

Message ID 1593232671-5690-10-git-send-email-timothy.mcdaniel@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series event/dlb Intel DLB PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Timothy McDaniel June 27, 2020, 4:37 a.m. UTC
  From: "McDaniel, Timothy" <timothy.mcdaniel@intel.com>

Signed-off-by: McDaniel, Timothy <timothy.mcdaniel@intel.com>
---
 drivers/event/dlb/dlb_inline_fns.h |   80 ++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 drivers/event/dlb/dlb_inline_fns.h
  

Comments

Bruce Richardson July 7, 2020, 12:02 p.m. UTC | #1
On Fri, Jun 26, 2020 at 11:37:33PM -0500, Tim McDaniel wrote:
> From: "McDaniel, Timothy" <timothy.mcdaniel@intel.com>
> 
> Signed-off-by: McDaniel, Timothy <timothy.mcdaniel@intel.com>
> ---
>  drivers/event/dlb/dlb_inline_fns.h |   80 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 80 insertions(+)
>  create mode 100644 drivers/event/dlb/dlb_inline_fns.h
> 
> diff --git a/drivers/event/dlb/dlb_inline_fns.h b/drivers/event/dlb/dlb_inline_fns.h
> new file mode 100644
> index 0000000..86f85aa
> --- /dev/null
> +++ b/drivers/event/dlb/dlb_inline_fns.h
> @@ -0,0 +1,80 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2016-2020 Intel Corporation
> + */
> +
> +#include "rte_memcpy.h"
> +#include "rte_io.h"
> +
> +/* Inline functions required in more than one source file.
> + */
> +
> +static inline struct dlb_eventdev *
> +dlb_pmd_priv(const struct rte_eventdev *eventdev)
> +{
> +	return eventdev->data->dev_private;
> +}
> +
> +static inline void
> +dlb_umonitor(volatile void *addr)
> +{
> +	asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
> +			:
> +			: "D" (addr));
> +}
> +
> +static inline void
> +dlb_umwait(int state, uint64_t timeout)
> +{
> +	uint32_t eax = timeout & UINT32_MAX;
> +	uint32_t edx = timeout >> 32;
> +
> +	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
> +			:
> +			: "D" (state),  "a" (eax), "d" (edx));
> +}
> +
> +static inline void
> +dlb_movntdq(void *qe4, void *pp_addr)
> +{
> +	/* Move entire 64B cache line of QEs, 128 bits (16B) at a time. */
> +	long long *_qe  = (long long *)qe4;
> +	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
> +	__v2di src_data1 = (__v2di){_qe[2], _qe[3]};
> +	__v2di src_data2 = (__v2di){_qe[4], _qe[5]};
> +	__v2di src_data3 = (__v2di){_qe[6], _qe[7]};
> +
> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 0, (__v2di)src_data0);
> +	rte_wmb();
> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 1, (__v2di)src_data1);
> +	rte_wmb();
> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 2, (__v2di)src_data2);
> +	rte_wmb();
> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 3, (__v2di)src_data3);
> +	rte_wmb();
> +}
> +
> +static inline void
> +dlb_movntdq_single(void *qe4, void *pp_addr)
> +{
> +	long long *_qe  = (long long *)qe4;
> +	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
> +
> +	__builtin_ia32_movntdq((__v2di *)pp_addr, (__v2di)src_data0);
> +}
> +
> +static inline void
> +dlb_cldemote(void *addr)
> +{
> +	/* Load addr into RSI, then demote the cache line of the address
> +	 * contained in that register.
> +	 */
> +	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (addr));
> +}
> +
> +static inline void
> +dlb_movdir64b(void *qe4, void *pp_addr)
> +{
> +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> +		     :
> +		     : "a" (pp_addr), "d" (qe4));
> +}
> -- 

Although these are mostly x86-specific, a number of these functions may be
needed in other places in DPDK also, so they should perhaps be put in a
common location.

Also, for the use of these, can you try and align the parameter names and
order as far as possible with those of the equivalent x86 intrinsics
documented at
https://software.intel.com/sites/landingpage/IntrinsicsGuide/. This will
make it easier in the future to switch to use the proper intrinsics rather
than our own versions. For example, _movdir64b intrinsic has parameters
"void *dst, const void *src", which is reversed to what you have here.

Thanks,
/Bruce
  
Timothy McDaniel July 7, 2020, 2:33 p.m. UTC | #2
>-----Original Message-----
>From: Bruce Richardson <bruce.richardson@intel.com>
>Sent: Tuesday, July 7, 2020 7:03 AM
>To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>Cc: jerinj@marvell.com; mattias.ronnblom@ericsson.com; dev@dpdk.org; Eads,
>Gage <gage.eads@intel.com>; Van Haaren, Harry <harry.van.haaren@intel.com>
>Subject: Re: [dpdk-dev] [PATCH 09/27] event/dlb: add inline functions used in
>multiple files
>
>On Fri, Jun 26, 2020 at 11:37:33PM -0500, Tim McDaniel wrote:
>> From: "McDaniel, Timothy" <timothy.mcdaniel@intel.com>
>>
>> Signed-off-by: McDaniel, Timothy <timothy.mcdaniel@intel.com>
>> ---
>>  drivers/event/dlb/dlb_inline_fns.h |   80
>++++++++++++++++++++++++++++++++++++
>>  1 file changed, 80 insertions(+)
>>  create mode 100644 drivers/event/dlb/dlb_inline_fns.h
>>
>> diff --git a/drivers/event/dlb/dlb_inline_fns.h
>b/drivers/event/dlb/dlb_inline_fns.h
>> new file mode 100644
>> index 0000000..86f85aa
>> --- /dev/null
>> +++ b/drivers/event/dlb/dlb_inline_fns.h
>> @@ -0,0 +1,80 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2016-2020 Intel Corporation
>> + */
>> +
>> +#include "rte_memcpy.h"
>> +#include "rte_io.h"
>> +
>> +/* Inline functions required in more than one source file.
>> + */
>> +
>> +static inline struct dlb_eventdev *
>> +dlb_pmd_priv(const struct rte_eventdev *eventdev)
>> +{
>> +	return eventdev->data->dev_private;
>> +}
>> +
>> +static inline void
>> +dlb_umonitor(volatile void *addr)
>> +{
>> +	asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
>> +			:
>> +			: "D" (addr));
>> +}
>> +
>> +static inline void
>> +dlb_umwait(int state, uint64_t timeout)
>> +{
>> +	uint32_t eax = timeout & UINT32_MAX;
>> +	uint32_t edx = timeout >> 32;
>> +
>> +	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
>> +			:
>> +			: "D" (state),  "a" (eax), "d" (edx));
>> +}
>> +
>> +static inline void
>> +dlb_movntdq(void *qe4, void *pp_addr)
>> +{
>> +	/* Move entire 64B cache line of QEs, 128 bits (16B) at a time. */
>> +	long long *_qe  = (long long *)qe4;
>> +	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
>> +	__v2di src_data1 = (__v2di){_qe[2], _qe[3]};
>> +	__v2di src_data2 = (__v2di){_qe[4], _qe[5]};
>> +	__v2di src_data3 = (__v2di){_qe[6], _qe[7]};
>> +
>> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 0, (__v2di)src_data0);
>> +	rte_wmb();
>> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 1, (__v2di)src_data1);
>> +	rte_wmb();
>> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 2, (__v2di)src_data2);
>> +	rte_wmb();
>> +	__builtin_ia32_movntdq((__v2di *)pp_addr + 3, (__v2di)src_data3);
>> +	rte_wmb();
>> +}
>> +
>> +static inline void
>> +dlb_movntdq_single(void *qe4, void *pp_addr)
>> +{
>> +	long long *_qe  = (long long *)qe4;
>> +	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
>> +
>> +	__builtin_ia32_movntdq((__v2di *)pp_addr, (__v2di)src_data0);
>> +}
>> +
>> +static inline void
>> +dlb_cldemote(void *addr)
>> +{
>> +	/* Load addr into RSI, then demote the cache line of the address
>> +	 * contained in that register.
>> +	 */
>> +	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (addr));
>> +}
>> +
>> +static inline void
>> +dlb_movdir64b(void *qe4, void *pp_addr)
>> +{
>> +	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
>> +		     :
>> +		     : "a" (pp_addr), "d" (qe4));
>> +}
>> --
>
>Although these are mostly x86-specific, a number of these functions may be
>needed in other places in DPDK also, so they should perhaps be put in a
>common location.
>
>Also, for the use of these, can you try and align the parameter names and
>order as far as possible with those of the equivalent x86 intrinsics
>documented at
>https://software.intel.com/sites/landingpage/IntrinsicsGuide/. This will
>make it easier in the future to switch to use the proper intrinsics rather
>than our own versions. For example, _movdir64b intrinsic has parameters
>"void *dst, const void *src", which is reversed to what you have here.
>
>Thanks,
>/Bruce

Thanks for the suggestions, Bruce. I will make those changes.
  

Patch

diff --git a/drivers/event/dlb/dlb_inline_fns.h b/drivers/event/dlb/dlb_inline_fns.h
new file mode 100644
index 0000000..86f85aa
--- /dev/null
+++ b/drivers/event/dlb/dlb_inline_fns.h
@@ -0,0 +1,80 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2020 Intel Corporation
+ */
+
+#include "rte_memcpy.h"
+#include "rte_io.h"
+
+/* Inline functions required in more than one source file.
+ */
+
+static inline struct dlb_eventdev *
+dlb_pmd_priv(const struct rte_eventdev *eventdev)
+{
+	return eventdev->data->dev_private;
+}
+
+static inline void
+dlb_umonitor(volatile void *addr)
+{
+	asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
+			:
+			: "D" (addr));
+}
+
+static inline void
+dlb_umwait(int state, uint64_t timeout)
+{
+	uint32_t eax = timeout & UINT32_MAX;
+	uint32_t edx = timeout >> 32;
+
+	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
+			:
+			: "D" (state),  "a" (eax), "d" (edx));
+}
+
+static inline void
+dlb_movntdq(void *qe4, void *pp_addr)
+{
+	/* Move entire 64B cache line of QEs, 128 bits (16B) at a time. */
+	long long *_qe  = (long long *)qe4;
+	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
+	__v2di src_data1 = (__v2di){_qe[2], _qe[3]};
+	__v2di src_data2 = (__v2di){_qe[4], _qe[5]};
+	__v2di src_data3 = (__v2di){_qe[6], _qe[7]};
+
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 0, (__v2di)src_data0);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 1, (__v2di)src_data1);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 2, (__v2di)src_data2);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 3, (__v2di)src_data3);
+	rte_wmb();
+}
+
+static inline void
+dlb_movntdq_single(void *qe4, void *pp_addr)
+{
+	long long *_qe  = (long long *)qe4;
+	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
+
+	__builtin_ia32_movntdq((__v2di *)pp_addr, (__v2di)src_data0);
+}
+
+static inline void
+dlb_cldemote(void *addr)
+{
+	/* Load addr into RSI, then demote the cache line of the address
+	 * contained in that register.
+	 */
+	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (addr));
+}
+
+static inline void
+dlb_movdir64b(void *qe4, void *pp_addr)
+{
+	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+		     :
+		     : "a" (pp_addr), "d" (qe4));
+}