[v2,3/4] service: use multi-word bitset to represent service flags

Message ID 20241010083022.3437380-4-david.marchand@redhat.com (mailing list archive)
State Accepted
Delegated to: David Marchand
Headers
Series Add bitset type |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

David Marchand Oct. 10, 2024, 8:30 a.m. UTC
From: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

Use a multi-word bitset to track which services are mapped to which
lcores, allowing the RTE_SERVICE_NUM_MAX compile-time constant to be >
64.

Replace array-of-bytes service-currently-active flags with a more
compact multi-word bitset-based representation, reducing memory
footprint somewhat.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/rte_service.c | 70 ++++++++++++++----------------------
 1 file changed, 27 insertions(+), 43 deletions(-)
  

Patch

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index a38c594ce4..324471e897 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@ 
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_bitset.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -65,11 +66,11 @@  struct service_stats {
 /* the internal values of a service core */
 struct __rte_cache_aligned core_state {
 	/* map of services IDs are run on this core */
-	uint64_t service_mask;
+	RTE_BITSET_DECLARE(mapped_services, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
 	RTE_ATOMIC(uint8_t) thread_active; /* indicates when thread is in service_run() */
 	uint8_t is_service_core; /* set if core is currently a service core */
-	uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+	RTE_BITSET_DECLARE(service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 	RTE_ATOMIC(uint64_t) loops;
 	RTE_ATOMIC(uint64_t) cycles;
 	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
@@ -83,11 +84,6 @@  static uint32_t rte_service_library_initialized;
 int32_t
 rte_service_init(void)
 {
-	/* Hard limit due to the use of an uint64_t-based bitmask (and the
-	 * clzl intrinsic).
-	 */
-	RTE_BUILD_BUG_ON(RTE_SERVICE_NUM_MAX > 64);
-
 	if (rte_service_library_initialized) {
 		EAL_LOG(NOTICE,
 			"service library init() called, init flag %d",
@@ -298,7 +294,7 @@  rte_service_component_unregister(uint32_t id)
 
 	/* clear the run-bit in all cores */
 	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+		rte_bitset_clear(lcore_states[i].mapped_services, id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -423,7 +419,7 @@  service_runner_do_callback(struct rte_service_spec_impl *s,
 
 /* Expects the service 's' is valid. */
 static int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+service_run(uint32_t i, struct core_state *cs, const uint64_t *mapped_services,
 	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
@@ -437,12 +433,12 @@  service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 			RUNSTATE_RUNNING ||
 	    rte_atomic_load_explicit(&s->app_runstate, rte_memory_order_acquire) !=
 			RUNSTATE_RUNNING ||
-	    !(service_mask & (UINT64_C(1) << i))) {
-		cs->service_active_on_lcore[i] = 0;
+	    !rte_bitset_test(mapped_services, i)) {
+		rte_bitset_clear(cs->service_active_on_lcore, i);
 		return -ENOEXEC;
 	}
 
-	cs->service_active_on_lcore[i] = 1;
+	rte_bitset_set(cs->service_active_on_lcore, i);
 
 	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_spinlock_trylock(&s->execute_lock))
@@ -467,7 +463,7 @@  rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		if (rte_bitset_test(lcore_states[ids[i]].service_active_on_lcore, id))
 			return 1;
 	}
 
@@ -487,7 +483,9 @@  rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 	 */
 	rte_atomic_fetch_add_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
-	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+	RTE_BITSET_DECLARE(all_services, RTE_SERVICE_NUM_MAX);
+	rte_bitset_set_all(all_services, RTE_SERVICE_NUM_MAX);
+	int ret = service_run(id, cs, all_services, s, serialize_mt_unsafe);
 
 	rte_atomic_fetch_sub_explicit(&s->num_mapped_cores, 1, rte_memory_order_relaxed);
 
@@ -498,7 +496,6 @@  static int32_t
 service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
-	uint8_t i;
 	const int lcore = rte_lcore_id();
 	struct core_state *cs = &lcore_states[lcore];
 
@@ -510,20 +507,11 @@  service_runner_func(void *arg)
 	 */
 	while (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_RUNNING) {
+		ssize_t id;
 
-		const uint64_t service_mask = cs->service_mask;
-		uint8_t start_id;
-		uint8_t end_id;
-
-		if (service_mask == 0)
-			continue;
-
-		start_id = rte_ctz64(service_mask);
-		end_id = 64 - rte_clz64(service_mask);
-
-		for (i = start_id; i < end_id; i++) {
+		RTE_BITSET_FOREACH_SET(id, cs->mapped_services, RTE_SERVICE_NUM_MAX) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i), 1);
+			service_run(id, cs, cs->mapped_services, service_get(id), 1);
 		}
 
 		rte_atomic_store_explicit(&cs->loops, cs->loops + 1, rte_memory_order_relaxed);
@@ -532,8 +520,7 @@  service_runner_func(void *arg)
 	/* Switch off this core for all services, to ensure that future
 	 * calls to may_be_active() know this core is switched off.
 	 */
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
-		cs->service_active_on_lcore[i] = 0;
+	rte_bitset_clear_all(cs->service_active_on_lcore, RTE_SERVICE_NUM_MAX);
 
 	/* Use SEQ CST memory ordering to avoid any re-ordering around
 	 * this store, ensuring that once this store is visible, the service
@@ -599,7 +586,7 @@  rte_service_lcore_count_services(uint32_t lcore)
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
-	return rte_popcount64(cs->service_mask);
+	return rte_bitset_count_set(cs->mapped_services, RTE_SERVICE_NUM_MAX);
 }
 
 int32_t
@@ -652,25 +639,23 @@  service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 			!lcore_states[lcore].is_service_core)
 		return -EINVAL;
 
-	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			rte_bitset_set(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			rte_bitset_clear(lcore_states[lcore].mapped_services, sid);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = rte_bitset_test(lcore_states[lcore].mapped_services, sid);
 
 	return 0;
 }
@@ -712,11 +697,11 @@  set_lcore_state(uint32_t lcore, int32_t state)
 int32_t
 rte_service_lcore_reset_all(void)
 {
-	/* loop over cores, reset all to mask 0 */
+	/* loop over cores, reset all mapped services */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
 		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+			rte_bitset_clear_all(lcore_states[i].mapped_services, RTE_SERVICE_NUM_MAX);
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
@@ -744,7 +729,7 @@  rte_service_lcore_add(uint32_t lcore)
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	rte_bitset_clear_all(lcore_states[lcore].mapped_services, RTE_SERVICE_NUM_MAX);
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
@@ -827,12 +812,11 @@  rte_service_lcore_stop(uint32_t lcore)
 
 	uint32_t i;
 	struct core_state *cs = &lcore_states[lcore];
-	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		int32_t enabled = service_mask & (UINT64_C(1) << i);
-		int32_t service_running = rte_service_runstate_get(i);
-		int32_t only_core = (1 ==
+		bool enabled = rte_bitset_test(cs->mapped_services, i);
+		bool service_running = rte_service_runstate_get(i);
+		bool only_core = (1 ==
 			rte_atomic_load_explicit(&rte_services[i].num_mapped_cores,
 				rte_memory_order_relaxed));