[RFC,v4,5/6] service: keep per-lcore state in lcore variable

Message ID 20240225150330.517225-6-mattias.ronnblom@ericsson.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Lcore variables |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Mattias Rönnblom Feb. 25, 2024, 3:03 p.m. UTC
  Replace static array of cache-aligned structs with an lcore variable,
to slightly benefit code simplicity and performance.

RFC v4:
 * Remove strange-looking lcore value lookup potentially containing
   invalid lcore id. (Morten Brørup)
 * Replace misplaced tab with space. (Morten Brørup)

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_service.c | 120 ++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 51 deletions(-)
  

Comments

Mattias Rönnblom Feb. 25, 2024, 4:28 p.m. UTC | #1
On 2024-02-25 16:03, Mattias Rönnblom wrote:
> Replace static array of cache-aligned structs with an lcore variable,
> to slightly benefit code simplicity and performance.
> 
> RFC v4:
>   * Remove strange-looking lcore value lookup potentially containing
>     invalid lcore id. (Morten Brørup)
>   * Replace misplaced tab with space. (Morten Brørup)
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>   lib/eal/common/rte_service.c | 120 ++++++++++++++++++++---------------
>   1 file changed, 69 insertions(+), 51 deletions(-)
> 
> diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
> index d959c91459..7fbae704ed 100644
> --- a/lib/eal/common/rte_service.c
> +++ b/lib/eal/common/rte_service.c
> @@ -11,6 +11,7 @@
>   
>   #include <eal_trace_internal.h>
>   #include <rte_lcore.h>
> +#include <rte_lcore_var.h>
>   #include <rte_branch_prediction.h>
>   #include <rte_common.h>
>   #include <rte_cycles.h>
> @@ -75,7 +76,7 @@ struct core_state {
>   
>   static uint32_t rte_service_count;
>   static struct rte_service_spec_impl *rte_services;
> -static struct core_state *lcore_states;
> +static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states);
>   static uint32_t rte_service_library_initialized;
>   
>   int32_t
> @@ -101,11 +102,12 @@ rte_service_init(void)
>   		goto fail_mem;
>   	}
>   
> -	lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
> -			sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
> -	if (!lcore_states) {
> -		EAL_LOG(ERR, "error allocating core states array");
> -		goto fail_mem;
> +	if (lcore_states == NULL)
> +		RTE_LCORE_VAR_ALLOC(lcore_states);
> +	else {
> +		struct core_state *cs;
> +		RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
> +			memset(cs, 0, sizeof(struct core_state));
>   	}
>   
>   	int i;
> @@ -122,7 +124,6 @@ rte_service_init(void)
>   	return 0;
>   fail_mem:
>   	rte_free(rte_services);
> -	rte_free(lcore_states);
>   	return -ENOMEM;
>   }
>   
> @@ -136,7 +137,6 @@ rte_service_finalize(void)
>   	rte_eal_mp_wait_lcore();
>   
>   	rte_free(rte_services);
> -	rte_free(lcore_states);
>   
>   	rte_service_library_initialized = 0;
>   }
> @@ -286,7 +286,6 @@ rte_service_component_register(const struct rte_service_spec *spec,
>   int32_t
>   rte_service_component_unregister(uint32_t id)
>   {
> -	uint32_t i;
>   	struct rte_service_spec_impl *s;
>   	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
>   
> @@ -294,9 +293,10 @@ rte_service_component_unregister(uint32_t id)
>   
>   	s->internal_flags &= ~(SERVICE_F_REGISTERED);
>   
> +	struct core_state *cs;
>   	/* clear the run-bit in all cores */
> -	for (i = 0; i < RTE_MAX_LCORE; i++)
> -		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
> +	RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
> +		cs->service_mask &= ~(UINT64_C(1) << id);
>   
>   	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
>   
> @@ -454,7 +454,10 @@ rte_service_may_be_active(uint32_t id)
>   		return -EINVAL;
>   
>   	for (i = 0; i < lcore_count; i++) {
> -		if (lcore_states[ids[i]].service_active_on_lcore[id])
> +		struct core_state *cs =
> +			RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states);
> +
> +		if (cs->service_active_on_lcore[id])
>   			return 1;
>   	}
>   
> @@ -464,7 +467,7 @@ rte_service_may_be_active(uint32_t id)
>   int32_t
>   rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
>   {
> -	struct core_state *cs = &lcore_states[rte_lcore_id()];
> +	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);
>   	struct rte_service_spec_impl *s;
>   
>   	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> @@ -486,8 +489,7 @@ service_runner_func(void *arg)
>   {
>   	RTE_SET_USED(arg);
>   	uint8_t i;
> -	const int lcore = rte_lcore_id();
> -	struct core_state *cs = &lcore_states[lcore];
> +	struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states);
>   
>   	rte_atomic_store_explicit(&cs->thread_active, 1, rte_memory_order_seq_cst);
>   
> @@ -533,13 +535,17 @@ service_runner_func(void *arg)
>   int32_t
>   rte_service_lcore_may_be_active(uint32_t lcore)
>   {
> -	if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
> +	struct core_state *cs;
> +
> +	if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)

This doesn't work, since 'cs' is not yet initialized. I'll fix it v5.

<snip>
  

Patch

diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d959c91459..7fbae704ed 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -11,6 +11,7 @@ 
 
 #include <eal_trace_internal.h>
 #include <rte_lcore.h>
+#include <rte_lcore_var.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -75,7 +76,7 @@  struct core_state {
 
 static uint32_t rte_service_count;
 static struct rte_service_spec_impl *rte_services;
-static struct core_state *lcore_states;
+static RTE_LCORE_VAR_HANDLE(struct core_state, lcore_states);
 static uint32_t rte_service_library_initialized;
 
 int32_t
@@ -101,11 +102,12 @@  rte_service_init(void)
 		goto fail_mem;
 	}
 
-	lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
-			sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
-	if (!lcore_states) {
-		EAL_LOG(ERR, "error allocating core states array");
-		goto fail_mem;
+	if (lcore_states == NULL)
+		RTE_LCORE_VAR_ALLOC(lcore_states);
+	else {
+		struct core_state *cs;
+		RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+			memset(cs, 0, sizeof(struct core_state));
 	}
 
 	int i;
@@ -122,7 +124,6 @@  rte_service_init(void)
 	return 0;
 fail_mem:
 	rte_free(rte_services);
-	rte_free(lcore_states);
 	return -ENOMEM;
 }
 
@@ -136,7 +137,6 @@  rte_service_finalize(void)
 	rte_eal_mp_wait_lcore();
 
 	rte_free(rte_services);
-	rte_free(lcore_states);
 
 	rte_service_library_initialized = 0;
 }
@@ -286,7 +286,6 @@  rte_service_component_register(const struct rte_service_spec *spec,
 int32_t
 rte_service_component_unregister(uint32_t id)
 {
-	uint32_t i;
 	struct rte_service_spec_impl *s;
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
@@ -294,9 +293,10 @@  rte_service_component_unregister(uint32_t id)
 
 	s->internal_flags &= ~(SERVICE_F_REGISTERED);
 
+	struct core_state *cs;
 	/* clear the run-bit in all cores */
-	for (i = 0; i < RTE_MAX_LCORE; i++)
-		lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+	RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+		cs->service_mask &= ~(UINT64_C(1) << id);
 
 	memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
 
@@ -454,7 +454,10 @@  rte_service_may_be_active(uint32_t id)
 		return -EINVAL;
 
 	for (i = 0; i < lcore_count; i++) {
-		if (lcore_states[ids[i]].service_active_on_lcore[id])
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(ids[i], lcore_states);
+
+		if (cs->service_active_on_lcore[id])
 			return 1;
 	}
 
@@ -464,7 +467,7 @@  rte_service_may_be_active(uint32_t id)
 int32_t
 rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 {
-	struct core_state *cs = &lcore_states[rte_lcore_id()];
+	struct core_state *cs =	RTE_LCORE_VAR_PTR(lcore_states);
 	struct rte_service_spec_impl *s;
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
@@ -486,8 +489,7 @@  service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint8_t i;
-	const int lcore = rte_lcore_id();
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs = RTE_LCORE_VAR_PTR(lcore_states);
 
 	rte_atomic_store_explicit(&cs->thread_active, 1, rte_memory_order_seq_cst);
 
@@ -533,13 +535,17 @@  service_runner_func(void *arg)
 int32_t
 rte_service_lcore_may_be_active(uint32_t lcore)
 {
-	if (lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE || !cs->is_service_core)
 		return -EINVAL;
 
+	cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
 	/* Load thread_active using ACQUIRE to avoid instructions dependent on
 	 * the result being re-ordered before this load completes.
 	 */
-	return rte_atomic_load_explicit(&lcore_states[lcore].thread_active,
+	return rte_atomic_load_explicit(&cs->thread_active,
 			       rte_memory_order_acquire);
 }
 
@@ -547,9 +553,11 @@  int32_t
 rte_service_lcore_count(void)
 {
 	int32_t count = 0;
-	uint32_t i;
-	for (i = 0; i < RTE_MAX_LCORE; i++)
-		count += lcore_states[i].is_service_core;
+
+	struct core_state *cs;
+	RTE_LCORE_VAR_FOREACH_VALUE(cs, lcore_states)
+		count += cs->is_service_core;
+
 	return count;
 }
 
@@ -566,7 +574,8 @@  rte_service_lcore_list(uint32_t array[], uint32_t n)
 	uint32_t i;
 	uint32_t idx = 0;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		struct core_state *cs = &lcore_states[i];
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(i, lcore_states);
 		if (cs->is_service_core) {
 			array[idx] = i;
 			idx++;
@@ -582,7 +591,7 @@  rte_service_lcore_count_services(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs = RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -634,30 +643,31 @@  rte_service_start_with_defaults(void)
 static int32_t
 service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
 {
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
 	/* validate ID, or return error value */
 	if (!service_valid(sid) || lcore >= RTE_MAX_LCORE ||
-			!lcore_states[lcore].is_service_core)
+			!cs->is_service_core)
 		return -EINVAL;
 
 	uint64_t sid_mask = UINT64_C(1) << sid;
 	if (set) {
-		uint64_t lcore_mapped = lcore_states[lcore].service_mask &
-			sid_mask;
+		uint64_t lcore_mapped = cs->service_mask & sid_mask;
 
 		if (*set && !lcore_mapped) {
-			lcore_states[lcore].service_mask |= sid_mask;
+			cs->service_mask |= sid_mask;
 			rte_atomic_fetch_add_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 		if (!*set && lcore_mapped) {
-			lcore_states[lcore].service_mask &= ~(sid_mask);
+			cs->service_mask &= ~(sid_mask);
 			rte_atomic_fetch_sub_explicit(&rte_services[sid].num_mapped_cores,
 				1, rte_memory_order_relaxed);
 		}
 	}
 
 	if (enabled)
-		*enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+		*enabled = !!(cs->service_mask & (sid_mask));
 
 	return 0;
 }
@@ -685,13 +695,14 @@  set_lcore_state(uint32_t lcore, int32_t state)
 {
 	/* mark core state in hugepage backed config */
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	cfg->lcore_role[lcore] = state;
 
 	/* mark state in process local lcore_config */
 	lcore_config[lcore].core_role = state;
 
 	/* update per-lcore optimized state tracking */
-	lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+	cs->is_service_core = (state == ROLE_SERVICE);
 
 	rte_eal_trace_service_lcore_state_change(lcore, state);
 }
@@ -702,14 +713,16 @@  rte_service_lcore_reset_all(void)
 	/* loop over cores, reset all to mask 0 */
 	uint32_t i;
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (lcore_states[i].is_service_core) {
-			lcore_states[i].service_mask = 0;
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(i, lcore_states);
+		if (cs->is_service_core) {
+			cs->service_mask = 0;
 			set_lcore_state(i, ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
 			 * with load-acquire in runstate read functions.
 			 */
-			rte_atomic_store_explicit(&lcore_states[i].runstate,
+			rte_atomic_store_explicit(&cs->runstate,
 				RUNSTATE_STOPPED, rte_memory_order_release);
 		}
 	}
@@ -725,17 +738,19 @@  rte_service_lcore_add(uint32_t lcore)
 {
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
-	if (lcore_states[lcore].is_service_core)
+
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+	if (cs->is_service_core)
 		return -EALREADY;
 
 	set_lcore_state(lcore, ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
-	lcore_states[lcore].service_mask = 0;
+	cs->service_mask = 0;
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
-	rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+	rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED,
 		rte_memory_order_release);
 
 	return rte_eal_wait_lcore(lcore);
@@ -747,7 +762,7 @@  rte_service_lcore_del(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -EINVAL;
 
@@ -771,7 +786,7 @@  rte_service_lcore_start(uint32_t lcore)
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 	if (!cs->is_service_core)
 		return -EINVAL;
 
@@ -801,6 +816,8 @@  rte_service_lcore_start(uint32_t lcore)
 int32_t
 rte_service_lcore_stop(uint32_t lcore)
 {
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
@@ -808,12 +825,11 @@  rte_service_lcore_stop(uint32_t lcore)
 	 * memory order here to synchronize with store-release
 	 * in runstate update functions.
 	 */
-	if (rte_atomic_load_explicit(&lcore_states[lcore].runstate, rte_memory_order_acquire) ==
+	if (rte_atomic_load_explicit(&cs->runstate, rte_memory_order_acquire) ==
 			RUNSTATE_STOPPED)
 		return -EALREADY;
 
 	uint32_t i;
-	struct core_state *cs = &lcore_states[lcore];
 	uint64_t service_mask = cs->service_mask;
 
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
@@ -834,7 +850,7 @@  rte_service_lcore_stop(uint32_t lcore)
 	/* Use store-release memory order here to synchronize with
 	 * load-acquire in runstate read functions.
 	 */
-	rte_atomic_store_explicit(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+	rte_atomic_store_explicit(&cs->runstate, RUNSTATE_STOPPED,
 		rte_memory_order_release);
 
 	rte_eal_trace_service_lcore_stop(lcore);
@@ -845,7 +861,7 @@  rte_service_lcore_stop(uint32_t lcore)
 static uint64_t
 lcore_attr_get_loops(unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->loops, rte_memory_order_relaxed);
 }
@@ -853,7 +869,7 @@  lcore_attr_get_loops(unsigned int lcore)
 static uint64_t
 lcore_attr_get_cycles(unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->cycles, rte_memory_order_relaxed);
 }
@@ -861,7 +877,7 @@  lcore_attr_get_cycles(unsigned int lcore)
 static uint64_t
 lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->service_stats[service_id].calls,
 		rte_memory_order_relaxed);
@@ -870,7 +886,7 @@  lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore)
 static uint64_t
 lcore_attr_get_service_cycles(uint32_t service_id, unsigned int lcore)
 {
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	return rte_atomic_load_explicit(&cs->service_stats[service_id].cycles,
 		rte_memory_order_relaxed);
@@ -886,7 +902,10 @@  attr_get(uint32_t id, lcore_attr_get_fun lcore_attr_get)
 	uint64_t sum = 0;
 
 	for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
-		if (lcore_states[lcore].is_service_core)
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
+
+		if (cs->is_service_core)
 			sum += lcore_attr_get(id, lcore);
 	}
 
@@ -930,12 +949,11 @@  int32_t
 rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 			   uint64_t *attr_value)
 {
-	struct core_state *cs;
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	if (lcore >= RTE_MAX_LCORE || !attr_value)
 		return -EINVAL;
 
-	cs = &lcore_states[lcore];
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -960,7 +978,8 @@  rte_service_attr_reset_all(uint32_t id)
 		return -EINVAL;
 
 	for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
-		struct core_state *cs = &lcore_states[lcore];
+		struct core_state *cs =
+			RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 		cs->service_stats[id] = (struct service_stats) {};
 	}
@@ -971,12 +990,11 @@  rte_service_attr_reset_all(uint32_t id)
 int32_t
 rte_service_lcore_attr_reset_all(uint32_t lcore)
 {
-	struct core_state *cs;
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
-	cs = &lcore_states[lcore];
 	if (!cs->is_service_core)
 		return -ENOTSUP;
 
@@ -1011,7 +1029,7 @@  static void
 service_dump_calls_per_lcore(FILE *f, uint32_t lcore)
 {
 	uint32_t i;
-	struct core_state *cs = &lcore_states[lcore];
+	struct core_state *cs =	RTE_LCORE_VAR_LCORE_PTR(lcore, lcore_states);
 
 	fprintf(f, "%02d\t", lcore);
 	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {