[RFC,v3,5/6] service: keep per-lcore state in lcore variable

Message ID 20240220084908.488252-6-mattias.ronnblom@ericsson.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Lcore variables |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Mattias Rönnblom Feb. 20, 2024, 8:49 a.m. UTC
  Replace static array of cache-aligned structs with an lcore variable,
to slightly benefit code simplicity and performance.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_service.c | 119 ++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 51 deletions(-)
  

Comments

Morten Brørup Feb. 22, 2024, 9:42 a.m. UTC | #1
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Tuesday, 20 February 2024 09.49
> 
> Replace static array of cache-aligned structs with an lcore variable,
> to slightly benefit code simplicity and performance.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---


> @@ -486,8 +489,7 @@ service_runner_func(void *arg)
>  {
>  	RTE_SET_USED(arg);
>  	uint8_t i;
> -	const int lcore = rte_lcore_id();
> -	struct core_state *cs = &lcore_states[lcore];
> +	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);

Typo: TAB -> SPACE.

> 
>  	rte_atomic_store_explicit(&cs->thread_active, 1,
> rte_memory_order_seq_cst);
> 
> @@ -533,13 +535,16 @@ service_runner_func(void *arg)
>  int32_t
>  rte_service_lcore_may_be_active(uint32_t lcore)
>  {
> -	if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
> +	struct core_state *cs =
> +		RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
> +
> +	if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)
>  		return -EINVAL;

This comment is mostly related to patch 1 in the series...

You are setting cs = RTE_LCORE_VAR_LCORE_PTR(lcore, ...) before validating that lcore < RTE_MAX_LCORE. I wondered if that potentially was an overrun bug.

It is obvious when looking at the RTE_LCORE_VAR_LCORE_PTR() macro implementation, but perhaps its description could mention that it is safe to use with an "invalid" lcore_id, but not dereferencing it.
  
Mattias Rönnblom Feb. 23, 2024, 10:19 a.m. UTC | #2
On 2024-02-22 10:42, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Tuesday, 20 February 2024 09.49
>>
>> Replace static array of cache-aligned structs with an lcore variable,
>> to slightly benefit code simplicity and performance.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> ---
> 
> 
>> @@ -486,8 +489,7 @@ service_runner_func(void *arg)
>>   {
>>   	RTE_SET_USED(arg);
>>   	uint8_t i;
>> -	const int lcore = rte_lcore_id();
>> -	struct core_state *cs = &lcore_states[lcore];
>> +	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);
> 
> Typo: TAB -> SPACE.
> 

Will fix.

>>
>>   	rte_atomic_store_explicit(&cs->thread_active, 1,
>> rte_memory_order_seq_cst);
>>
>> @@ -533,13 +535,16 @@ service_runner_func(void *arg)
>>   int32_t
>>   rte_service_lcore_may_be_active(uint32_t lcore)
>>   {
>> -	if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
>> +	struct core_state *cs =
>> +		RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
>> +
>> +	if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)
>>   		return -EINVAL;
> 
> This comment is mostly related to patch 1 in the series...
> 
> You are setting cs = RTE_LCORE_VAR_LCORE_PTR(lcore, ...) before validating that lcore < RTE_MAX_LCORE. I wondered if that potentially was an overrun bug.
> 
> It is obvious when looking at the RTE_LCORE_VAR_LCORE_PTR() macro implementation, but perhaps its description could mention that it is safe to use with an "invalid" lcore_id, but not dereferencing it.
> 

I thought about adding something equivalent to an RTE_ASSERT() on 
lcore_id in the dereferencing macros, but then I thought that maybe it 
is a valid use case to pass invalid lcore ids.

Invalid ids being OK or not, I think the above code should do "cs = 
/../" *after* the lcore id check. Now it looks strange and force the 
reader to consider if this is valid or not, for no good reason.

The lcore variable API docs should probably explicitly allow invalid 
core id in the macros.
  

Patch

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d959c91459..de205c5da5 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@ 
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_lcore_var.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -75,7 +76,7 @@  struct core_state {
 
 static uint32_t rte_service_count;
 static struct rte_service_spec_impl *rte_services;
-static struct core_state *lcore_states;
+static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states);
 static uint32_t rte_service_library_initialized;
 
 int32_t
@@ -101,11 +102,12 @@  rte_service_init(void)
 		goto fail_mem;
 	}
 
-	lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
-			sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
-	if (!lcore_states) {
-		EAL_LOG(ERR, "error allocating core states array");
-		goto fail_mem;
+	if (lcore_states == NULL)
+		RTE_LCORE_VAR_ALLOC(lcore_states);
+	else {
+		struct core_state *cs;
+		RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+			memset(cs, 0, sizeof(struct core_state));
 	}
 
 	int i;
@@ -122,7 +124,6 @@  rte_service_init(void)
 	return 0;
 fail_mem:
 	rte_free(rte_services);
-	rte_free(lcore_states);
 	return -ENOMEM;
 }
 
@@ -136,7 +137,6 @@  rte_service_finalize(void)
 	rte_eal_mp_wait_lcore();
 
 	rte_free(rte_services);
-	rte_free(lcore_states);
 
 	rte_service_library_initialized = 0;
 }
@@ -286,7 +286,6 @@  rte_service_component_register(const struct rte_service_spec *spec,
 int32_t
 rte_service_component_unregister(uint32_t id)
 {
-	uint32_t i;
 	struct rte_service_spec_impl *s;
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
@@ -294,9 +293,10 @@  rte_service_component_unregister(uint32_t id)
 
 	s->internal_flags &= ~(SERVICE_F_REGISTERED);
 
+	struct core_state *cs;
 	/* clear the run-bit in all cores */
-	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+	RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+		cs->service_mask &= ~(UINT64_C(1) << id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -454,7 +454,10 @@  rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states);
+
+		if (cs->service_active_on_lcore[id])
 			return 1;
 	}
 
@@ -464,7 +467,7 @@  rte_service_may_be_active(uint32_t id)
 int32_t
 rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 {
-	struct core_state *cs = &lcore_states[rte_lcore_id()];
+	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);
 	struct rte_service_spec_impl *s;
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
@@ -486,8 +489,7 @@  service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint8_t i;
-	const int lcore = rte_lcore_id();
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);
 
 	rte_atomic_store_explicit(&cs->thread_active, 1, rte_memory_order_seq_cst);
 
@@ -533,13 +535,16 @@  service_runner_func(void *arg)
 int32_t
 rte_service_lcore_may_be_active(uint32_t lcore)
 {
-	if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+	struct core_state *cs =
+		RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
+	if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)
 		return -EINVAL;
 
 	/* Load thread_active using ACQUIRE to avoid instructions dependent on
 	 * the result being re-ordered before this load completes.
 	 */
-	return rte_atomic_load_explicit(&lcore_states[lcore].thread_active,
+	return rte_atomic_load_explicit(&cs->thread_active,
 			       rte_memory_order_acquire);
 }
 
@@ -547,9 +552,11 @@  int32_t
 rte_service_lcore_count(void)
 {
 	int32_t count = 0;
-	uint32_t i;
-	for (i = 0; i < RTE_MAX_LCORE; i++)
-		count += lcore_states[i].is_service_core;
+
+	struct core_state *cs;
+	RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+		count += cs->is_service_core;
+
 	return count;
 }
 
@@ -566,7 +573,8 @@  rte_service_lcore_list(uint32_t array[], uint32_t n)
 	uint32_t i;
 	uint32_t idx = 0;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		struct core_state *cs = &lcore_states[i];
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(i, lcore_states);
 		if (cs->is_service_core) {
 			array[idx] = i;
 			idx++;
@@ -582,7 +590,7 @@  rte_service_lcore_count_services(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -634,30 +642,31 @@  rte_service_start_with_defaults(void)
 static int32_t
 service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 {
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
 	/* validate ID, or return error value */
 	if (!service_valid(sid) || lcore >= RTE_MAX_LCORE ||
-			!lcore_states[lcore].is_service_core)
+			!cs->is_service_core)
 		return -EINVAL;
 
 	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = cs->service_mask & sid_mask;
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			cs->service_mask |= sid_mask;
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			cs->service_mask &= ~(sid_mask);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = !!(cs->service_mask & (sid_mask));
 
 	return 0;
 }
@@ -685,13 +694,14 @@  set_lcore_state(uint32_t lcore, int32_t state)
 {
 	/* mark core state in hugepage backed config */
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	cfg->lcore_role[lcore] = state;
 
 	/* mark state in process local lcore_config */
 	lcore_config[lcore].core_role = state;
 
 	/* update per-lcore optimized state tracking */
-	lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+	cs->is_service_core = (state == ROLE_SERVICE);
 
 	rte_eal_trace_service_lcore_state_change(lcore, state);
 }
@@ -702,14 +712,16 @@  rte_service_lcore_reset_all(void)
 	/* loop over cores, reset all to mask 0 */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(i, lcore_states);
+		if (cs->is_service_core) {
+			cs->service_mask = 0;
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
 			 * with load-acquire in runstate read functions.
 			 */
-			rte_atomic_store_explicit(&lcore_states[i].runstate,
+			rte_atomic_store_explicit(&cs->runstate,
 				RUNSTATE_STOPPED, rte_memory_order_release);
 		}
 	}
@@ -725,17 +737,19 @@  rte_service_lcore_add(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
-	if (lcore_states[lcore].is_service_core)
+
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+	if (cs->is_service_core)
 		return -EALREADY;
 
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	cs->service_mask = 0;
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
-	rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+	rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED,
 		rte_memory_order_release);
 
 	return rte_eal_wait_lcore(lcore);
@@ -747,7 +761,7 @@  rte_service_lcore_del(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -EINVAL;
 
@@ -771,7 +785,7 @@  rte_service_lcore_start(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -EINVAL;
 
@@ -801,6 +815,8 @@  rte_service_lcore_start(uint32_t lcore)
 int32_t
 rte_service_lcore_stop(uint32_t lcore)
 {
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
@@ -808,12 +824,11 @@  rte_service_lcore_stop(uint32_t lcore)
 	 * memory order here to synchronize with store-release
 	 * in runstate update functions.
 	 */
-	if (rte_atomic_load_explicit(&lcore_states[lcore].runstate, rte_memory_order_acquire) ==
+	if (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_STOPPED)
 		return -EALREADY;
 
 	uint32_t i;
-	struct core_state *cs = &lcore_states[lcore];
 	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
@@ -834,7 +849,7 @@  rte_service_lcore_stop(uint32_t lcore)
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
-	rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+	rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED,
 		rte_memory_order_release);
 
 	rte_eal_trace_service_lcore_stop(lcore);
@@ -845,7 +860,7 @@  rte_service_lcore_stop(uint32_t lcore)
 static uint64_t
 lcore_attr_get_loops(unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->loops, rte_memory_order_relaxed);
 }
@@ -853,7 +868,7 @@  lcore_attr_get_loops(unsigned int lcore)
 static uint64_t
 lcore_attr_get_cycles(unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->cycles, rte_memory_order_relaxed);
 }
@@ -861,7 +876,7 @@  lcore_attr_get_cycles(unsigned int lcore)
 static uint64_t
 lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->service_stats[service_id].calls,
 		rte_memory_order_relaxed);
@@ -870,7 +885,7 @@  lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore)
 static uint64_t
 lcore_attr_get_service_cycles(uint32_t service_id, unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->service_stats[service_id].cycles,
 		rte_memory_order_relaxed);
@@ -886,7 +901,10 @@  attr_get(uint32_t id, lcore_attr_get_fun lcore_attr_get)
 	uint64_t sum = 0;
 
 	for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
-		if (lcore_states[lcore].is_service_core)
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
+		if (cs->is_service_core)
 			sum += lcore_attr_get(id, lcore);
 	}
 
@@ -930,12 +948,11 @@  int32_t
 rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 			   uint64_t *attr_value)
 {
-	struct core_state *cs;
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	if (lcore >= RTE_MAX_LCORE || !attr_value)
 		return -EINVAL;
 
-	cs = &lcore_states[lcore];
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -960,7 +977,8 @@  rte_service_attr_reset_all(uint32_t id)
 		return -EINVAL;
 
 	for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
-		struct core_state *cs = &lcore_states[lcore];
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 		cs->service_stats[id] = (struct service_stats) {};
 	}
@@ -971,12 +989,11 @@  rte_service_attr_reset_all(uint32_t id)
 int32_t
 rte_service_lcore_attr_reset_all(uint32_t lcore)
 {
-	struct core_state *cs;
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	cs = &lcore_states[lcore];
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -1011,7 +1028,7 @@  static void
 service_dump_calls_per_lcore(FILE *f, uint32_t lcore)
 {
 	uint32_t i;
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	fprintf(f, "%02d\t", lcore);
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {