[v4,3/4] eal: add Arm WFET in power management intrinsics

Message ID 20240726171528.2245682-3-wathsala.vithanage@arm.com (mailing list archive)
State New
Delegated to: Thomas Monjalon
Headers
Series [v4,1/4] eal: expand the availability of WFE and related instructions |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Wathsala Wathawana Vithanage July 26, 2024, 5:15 p.m. UTC
Wait for event with timeout (WFET) puts the CPU in a low power
mode and stays there until an event is signalled (SEV), loss of
an exclusive monitor or a timeout.
WFET is enabled selectively by checking FEAT_WFxT in Linux
auxiliary vector. If FEAT_WFxT is not available power management
will fallback to WFE.
WFE is available on all the Arm platforms supported by DPDK.
Therefore, the RTE_ARM_USE_WFE macro is not required to enable
the WFE feature for PMD power monitoring. 
RTE_ARM_USE_WFE is used at the build time to use the WFE instruction
where applicable in the code at the developer's discretion rather
than as an indicator of the instruction's availability.

Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
Reviewed-by: Dhruv Tripathi <dhruv.tripathi@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>
Reviewed-by: Nick Connolly <nick.connolly@arm.com>
Reviewed-by: Vinod Krishna <vinod.krishna@arm.com>

---
 .mailmap                              |  1 +
 app/test/test_cpuflags.c              |  3 +++
 lib/eal/arm/include/rte_cpuflags_64.h |  3 +++
 lib/eal/arm/include/rte_pause_64.h    | 16 +++++++++--
 lib/eal/arm/rte_cpuflags.c            |  1 +
 lib/eal/arm/rte_power_intrinsics.c    | 39 ++++++++++++++++++---------
 6 files changed, 49 insertions(+), 14 deletions(-)
  

Patch

diff --git a/.mailmap b/.mailmap
index 9c28b74655..a5c49d3702 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1540,6 +1540,7 @@  Vincent Li <vincent.mc.li@gmail.com>
 Vincent S. Cojot <vcojot@redhat.com>
 Vinh Tran <vinh.t.tran10@gmail.com>
 Vipin Padmam Ramesh <vipinp@vmware.com>
+Vinod Krishna <vinod.krishna@arm.com>
 Vipin Varghese <vipin.varghese@amd.com> <vipin.varghese@intel.com>
 Vipul Ashri <vipul.ashri@oracle.com>
 Visa Hankala <visa@hankala.org>
diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c
index a0ff74720c..22ab4dff0a 100644
--- a/app/test/test_cpuflags.c
+++ b/app/test/test_cpuflags.c
@@ -156,6 +156,9 @@  test_cpuflags(void)
 
 	printf("Check for SVEBF16:\t");
 	CHECK_FOR_FLAG(RTE_CPUFLAG_SVEBF16);
+
+	printf("Check for WFXT:\t");
+	CHECK_FOR_FLAG(RTE_CPUFLAG_WFXT);
 #endif
 
 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h
index afe70209c3..993d980a02 100644
--- a/lib/eal/arm/include/rte_cpuflags_64.h
+++ b/lib/eal/arm/include/rte_cpuflags_64.h
@@ -36,6 +36,9 @@  enum rte_cpu_flag_t {
 	RTE_CPUFLAG_SVEF64MM,
 	RTE_CPUFLAG_SVEBF16,
 	RTE_CPUFLAG_AARCH64,
+
+	/* WFET and WFIT instructions */
+	RTE_CPUFLAG_WFXT,
 };
 
 #include "generic/rte_cpuflags.h"
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 8224f09ba7..809403bffa 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -24,15 +24,27 @@  static inline void rte_pause(void)
 	asm volatile("yield" ::: "memory");
 }
 
-/* Send a local event to quit WFE. */
+/* Send a local event to quit WFE/WFxT. */
 #define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); }
 
-/* Send a global event to quit WFE for all cores. */
+/* Send a global event to quit WFE/WFxT for all cores. */
 #define __RTE_ARM_SEV() { asm volatile("sev" : : : "memory"); }
 
 /* Put processor into low power WFE(Wait For Event) state. */
 #define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); }
 
+/* Put processor into low power WFET (WFE with Timeout) state. */
+#ifdef RTE_ARM_FEATURE_WFXT
+#define __RTE_ARM_WFET(t) {                              \
+	asm volatile("wfet %x[to]"                        \
+			:                                 \
+			: [to] "r" (t)                    \
+			: "memory");                      \
+	}
+#else
+#define __RTE_ARM_WFET(t) { RTE_SET_USED(t); }
+#endif
+
 /*
  * Atomic exclusive load from addr, it returns the 8-bit content of
  * *addr while making it 'monitored', when it is written by someone
diff --git a/lib/eal/arm/rte_cpuflags.c b/lib/eal/arm/rte_cpuflags.c
index 29884c285f..88e10c6da0 100644
--- a/lib/eal/arm/rte_cpuflags.c
+++ b/lib/eal/arm/rte_cpuflags.c
@@ -115,6 +115,7 @@  const struct feature_entry rte_cpu_feature_table[] = {
 	FEAT_DEF(SVEF32MM,	REG_HWCAP2,   10)
 	FEAT_DEF(SVEF64MM,	REG_HWCAP2,   11)
 	FEAT_DEF(SVEBF16,	REG_HWCAP2,   12)
+	FEAT_DEF(WFXT,		REG_HWCAP2,   31)
 	FEAT_DEF(AARCH64,	REG_PLATFORM,  0)
 };
 #endif /* RTE_ARCH */
diff --git a/lib/eal/arm/rte_power_intrinsics.c b/lib/eal/arm/rte_power_intrinsics.c
index b0056cce8b..6475bbca04 100644
--- a/lib/eal/arm/rte_power_intrinsics.c
+++ b/lib/eal/arm/rte_power_intrinsics.c
@@ -4,19 +4,32 @@ 
 
 #include <errno.h>
 
+#include "rte_cpuflags.h"
 #include "rte_power_intrinsics.h"
 
 /**
- * This function uses WFE instruction to make lcore suspend
+ *  Set wfet_en if WFET is supported
+ */
+#ifdef RTE_ARCH_64
+static uint8_t wfet_en;
+#endif /* RTE_ARCH_64 */
+
+RTE_INIT(rte_power_intrinsics_init)
+{
+#ifdef RTE_ARCH_64
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WFXT))
+		wfet_en = 1;
+#endif /* RTE_ARCH_64 */
+}
+
+/**
+ * This function uses WFE/WFET instruction to make lcore suspend
  * execution on ARM.
- * Note that timestamp based timeout is not supported yet.
  */
 int
 rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 		const uint64_t tsc_timestamp)
 {
-	RTE_SET_USED(tsc_timestamp);
-
 #ifdef RTE_ARCH_64
 	const unsigned int lcore_id = rte_lcore_id();
 	uint64_t cur_value;
@@ -33,28 +46,30 @@  rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 
 	switch (pmc->size) {
 	case sizeof(uint8_t):
-		__RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, rte_memory_order_relaxed)
-		__RTE_ARM_WFE()
+		__RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, rte_memory_order_relaxed);
 		break;
 	case sizeof(uint16_t):
-		__RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, rte_memory_order_relaxed)
-		__RTE_ARM_WFE()
+		__RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, rte_memory_order_relaxed);
 		break;
 	case sizeof(uint32_t):
-		__RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, rte_memory_order_relaxed)
-		__RTE_ARM_WFE()
+		__RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, rte_memory_order_relaxed);
 		break;
 	case sizeof(uint64_t):
-		__RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, rte_memory_order_relaxed)
-		__RTE_ARM_WFE()
+		__RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, rte_memory_order_relaxed);
 		break;
 	default:
 		return -EINVAL; /* unexpected size */
 	}
 
+	if (wfet_en)
+		__RTE_ARM_WFET(tsc_timestamp)
+	else
+		__RTE_ARM_WFE()
+
 	return 0;
 #else
 	RTE_SET_USED(pmc);
+	RTE_SET_USED(tsc_timestamp);
 
 	return -ENOTSUP;
 #endif /* RTE_ARCH_64 */