[v2,05/22] event/dlb2: add inline functions

Message ID 1602958879-8558-6-git-send-email-timothy.mcdaniel@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series Add DLB2 PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Timothy McDaniel Oct. 17, 2020, 6:21 p.m. UTC
  Add miscellaneous inline functions that may be called
from multiple files.  These functions include inline
assembly of new x86 instructions, such as movdir64b,
since they are not available as builtin functions in
the minimum supported GCC version.

Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
Reviewed-by: Gage Eads <gage.eads@intel.com>
---
 drivers/event/dlb2/dlb2_inline_fns.h | 81 ++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 drivers/event/dlb2/dlb2_inline_fns.h
  

Comments

Jerin Jacob Oct. 18, 2020, 8:59 a.m. UTC | #1
On Sat, Oct 17, 2020 at 11:50 PM Timothy McDaniel
<timothy.mcdaniel@intel.com> wrote:
>
> Add miscellaneous inline functions that may be called
> from multiple files.  These functions include inline
> assembly of new x86 instructions, such as movdir64b,
> since they are not available as builtin functions in
> the minimum supported GCC version.
>
> Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
> Reviewed-by: Gage Eads <gage.eads@intel.com>
> ---
>  drivers/event/dlb2/dlb2_inline_fns.h | 81 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
>  create mode 100644 drivers/event/dlb2/dlb2_inline_fns.h
>
> diff --git a/drivers/event/dlb2/dlb2_inline_fns.h b/drivers/event/dlb2/dlb2_inline_fns.h
> new file mode 100644
> index 0000000..9c3c36f
> --- /dev/null
> +++ b/drivers/event/dlb2/dlb2_inline_fns.h
> @@ -0,0 +1,81 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2016-2020 Intel Corporation
> + */
> +
> +#ifndef _DLB2_INLINE_FNS_H_
> +#define _DLB2_INLINE_FNS_H_
> +
> +/* Inline functions required in more than one source file. */
> +
> +static inline struct dlb2_eventdev *
> +dlb2_pmd_priv(const struct rte_eventdev *eventdev)
> +{
> +       return eventdev->data->dev_private;
> +}
> +
> +static inline void
> +dlb2_umonitor(volatile void *addr)
> +{
> +       asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
> +                       :
> +                       : "D" (addr));
> +}
> +
> +static inline void
> +dlb2_umwait(int state, uint64_t timeout)
> +{
> +       uint32_t eax = timeout & UINT32_MAX;
> +       uint32_t edx = timeout >> 32;
> +
> +       asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
> +                       :
> +                       : "D" (state),  "a" (eax), "d" (edx));
> +}
> +

Please change this instruction to use new rte public API once it gets merged.

> +static inline void
> +dlb2_movntdq(void *qe4, void *pp_addr)
> +{
> +       /* Move entire 64B cache line of QEs, 128 bits (16B) at a time. */
> +       long long *_qe  = (long long *)qe4;
> +       __v2di src_data0 = (__v2di){_qe[0], _qe[1]};
> +       __v2di src_data1 = (__v2di){_qe[2], _qe[3]};
> +       __v2di src_data2 = (__v2di){_qe[4], _qe[5]};
> +       __v2di src_data3 = (__v2di){_qe[6], _qe[7]};
> +
> +       __builtin_ia32_movntdq((__v2di *)pp_addr + 0, (__v2di)src_data0);
> +       rte_wmb();
> +       __builtin_ia32_movntdq((__v2di *)pp_addr + 1, (__v2di)src_data1);
> +       rte_wmb();
> +       __builtin_ia32_movntdq((__v2di *)pp_addr + 2, (__v2di)src_data2);
> +       rte_wmb();
> +       __builtin_ia32_movntdq((__v2di *)pp_addr + 3, (__v2di)src_data3);
> +       rte_wmb();
> +}
> +
> +static inline void
> +dlb2_movntdq_single(void *qe4, void *pp_addr)
> +{
> +       long long *_qe  = (long long *)qe4;
> +       __v2di src_data0 = (__v2di){_qe[0], _qe[1]};
> +
> +       __builtin_ia32_movntdq((__v2di *)pp_addr, (__v2di)src_data0);
> +}
> +
> +static inline void
> +dlb2_cldemote(void *addr)
> +{
> +       /* Load addr into RSI, then demote the cache line of the address
> +        * contained in that register.
> +        */
> +       asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (addr));
> +}
> +
> +static inline void
> +dlb2_movdir64b(void *qe4, void *pp_addr)
> +{
> +       asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> +                    :
> +                    : "a" (pp_addr), "d" (qe4));
> +}
> +
> +#endif /* _DLB2_INLINE_FNS_H_ */
> --
> 2.6.4
>
  
Timothy McDaniel Oct. 20, 2020, 2:08 p.m. UTC | #2
> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Sunday, October 18, 2020 4:00 AM
> To: McDaniel, Timothy <timothy.mcdaniel@intel.com>
> Cc: dpdk-dev <dev@dpdk.org>; Carrillo, Erik G <Erik.G.Carrillo@intel.com>;
> Eads, Gage <gage.eads@intel.com>; Van Haaren, Harry
> <harry.van.haaren@intel.com>; Jerin Jacob <jerinj@marvell.com>
> Subject: Re: [dpdk-dev] [PATCH v2 05/22] event/dlb2: add inline functions
> 
> On Sat, Oct 17, 2020 at 11:50 PM Timothy McDaniel
> <timothy.mcdaniel@intel.com> wrote:
> >
> > Add miscellaneous inline functions that may be called
> > from multiple files.  These functions include inline
> > assembly of new x86 instructions, such as movdir64b,
> > since they are not available as builtin functions in
> > the minimum supported GCC version.
> >
> > Signed-off-by: Timothy McDaniel <timothy.mcdaniel@intel.com>
> > Reviewed-by: Gage Eads <gage.eads@intel.com>
> > ---
> >  drivers/event/dlb2/dlb2_inline_fns.h | 81
> ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 81 insertions(+)
> >  create mode 100644 drivers/event/dlb2/dlb2_inline_fns.h
> >
> > diff --git a/drivers/event/dlb2/dlb2_inline_fns.h
> b/drivers/event/dlb2/dlb2_inline_fns.h
> > new file mode 100644
> > index 0000000..9c3c36f
> > --- /dev/null
> > +++ b/drivers/event/dlb2/dlb2_inline_fns.h
> > @@ -0,0 +1,81 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2016-2020 Intel Corporation
> > + */
> > +
> > +#ifndef _DLB2_INLINE_FNS_H_
> > +#define _DLB2_INLINE_FNS_H_
> > +
> > +/* Inline functions required in more than one source file. */
> > +
> > +static inline struct dlb2_eventdev *
> > +dlb2_pmd_priv(const struct rte_eventdev *eventdev)
> > +{
> > +       return eventdev->data->dev_private;
> > +}
> > +
> > +static inline void
> > +dlb2_umonitor(volatile void *addr)
> > +{
> > +       asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
> > +                       :
> > +                       : "D" (addr));
> > +}
> > +
> > +static inline void
> > +dlb2_umwait(int state, uint64_t timeout)
> > +{
> > +       uint32_t eax = timeout & UINT32_MAX;
> > +       uint32_t edx = timeout >> 32;
> > +
> > +       asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
> > +                       :
> > +                       : "D" (state),  "a" (eax), "d" (edx));
> > +}
> > +
> 
> Please change this instruction to use new rte public API once it gets merged.
> 

Yes, we will do that.
  

Patch

diff --git a/drivers/event/dlb2/dlb2_inline_fns.h b/drivers/event/dlb2/dlb2_inline_fns.h
new file mode 100644
index 0000000..9c3c36f
--- /dev/null
+++ b/drivers/event/dlb2/dlb2_inline_fns.h
@@ -0,0 +1,81 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2020 Intel Corporation
+ */
+
+#ifndef _DLB2_INLINE_FNS_H_
+#define _DLB2_INLINE_FNS_H_
+
+/* Inline functions required in more than one source file. */
+
+static inline struct dlb2_eventdev *
+dlb2_pmd_priv(const struct rte_eventdev *eventdev)
+{
+	return eventdev->data->dev_private;
+}
+
+static inline void
+dlb2_umonitor(volatile void *addr)
+{
+	asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7\t\n"
+			:
+			: "D" (addr));
+}
+
+static inline void
+dlb2_umwait(int state, uint64_t timeout)
+{
+	uint32_t eax = timeout & UINT32_MAX;
+	uint32_t edx = timeout >> 32;
+
+	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7\t\n"
+			:
+			: "D" (state),  "a" (eax), "d" (edx));
+}
+
+static inline void
+dlb2_movntdq(void *qe4, void *pp_addr)
+{
+	/* Move entire 64B cache line of QEs, 128 bits (16B) at a time. */
+	long long *_qe  = (long long *)qe4;
+	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
+	__v2di src_data1 = (__v2di){_qe[2], _qe[3]};
+	__v2di src_data2 = (__v2di){_qe[4], _qe[5]};
+	__v2di src_data3 = (__v2di){_qe[6], _qe[7]};
+
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 0, (__v2di)src_data0);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 1, (__v2di)src_data1);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 2, (__v2di)src_data2);
+	rte_wmb();
+	__builtin_ia32_movntdq((__v2di *)pp_addr + 3, (__v2di)src_data3);
+	rte_wmb();
+}
+
+static inline void
+dlb2_movntdq_single(void *qe4, void *pp_addr)
+{
+	long long *_qe  = (long long *)qe4;
+	__v2di src_data0 = (__v2di){_qe[0], _qe[1]};
+
+	__builtin_ia32_movntdq((__v2di *)pp_addr, (__v2di)src_data0);
+}
+
+static inline void
+dlb2_cldemote(void *addr)
+{
+	/* Load addr into RSI, then demote the cache line of the address
+	 * contained in that register.
+	 */
+	asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (addr));
+}
+
+static inline void
+dlb2_movdir64b(void *qe4, void *pp_addr)
+{
+	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+		     :
+		     : "a" (pp_addr), "d" (qe4));
+}
+
+#endif /* _DLB2_INLINE_FNS_H_ */