service: extend service function call statistics

Message ID 20240809202539.590510-1-mattias.ronnblom@ericsson.com (mailing list archive)
State Superseded
Delegated to: Thomas Monjalon
Headers
Series service: extend service function call statistics |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build fail github build: failed
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-compile-amd64-testing warning Testing issues
ci/iol-compile-arm64-testing success Testing PASS

Commit Message

Mattias Rönnblom Aug. 9, 2024, 8:25 p.m. UTC
Add two new per-service counters.

RTE_SERVICE_ATTR_IDLE_CALL_COUNT tracks the number of service function
invocations where no work was performed.

RTE_SERVICE_ATTR_ERROR_CALL_COUNT tracks the number invocations
resulting in an error.

The semantics of RTE_SERVICE_ATTR_CALL_COUNT remains the same (i.e.,
counting all invocations, regardless of return value).

The new statistics may be useful for both debugging and profiling
(e.g., calculate the average per-call processing latency for non-idle
service calls).

Service core tests are extended to cover the new counters, and
coverage for RTE_SERVICE_ATTR_CALL_COUNT is improved.

The documentation for the CYCLES attributes are updated to reflect
their actual semantics.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

--

PATCH:
 * Update release notes.
---
 app/test/test_service_cores.c          | 70 +++++++++++++++++++-------
 doc/guides/rel_notes/release_24_11.rst | 11 ++++
 lib/eal/common/rte_service.c           | 70 +++++++++++++++++++++-----
 lib/eal/include/rte_service.h          | 24 +++++++--
 4 files changed, 142 insertions(+), 33 deletions(-)
  

Patch

diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c
index 010ab82893..7ae6bbb179 100644
--- a/app/test/test_service_cores.c
+++ b/app/test/test_service_cores.c
@@ -3,11 +3,12 @@ 
  */
 
 #include <rte_common.h>
+#include <rte_cycles.h>
 #include <rte_hexdump.h>
-#include <rte_mbuf.h>
 #include <rte_malloc.h>
+#include <rte_mbuf.h>
 #include <rte_memcpy.h>
-#include <rte_cycles.h>
+#include <rte_random.h>
 
 #include <rte_service.h>
 #include <rte_service_component.h>
@@ -16,8 +17,10 @@ 
 
 /* used as the service core ID */
 static uint32_t slcore_id;
-/* used as timestamp to detect if a service core is running */
-static uint64_t service_tick;
+/* track service call count */
+static uint64_t service_calls;
+static uint64_t service_idle_calls;
+static uint64_t service_error_calls;
 /* used as a flag to check if a function was run */
 static uint32_t service_remote_launch_flag;
 
@@ -46,9 +49,21 @@  testsuite_teardown(void)
 static int32_t dummy_cb(void *args)
 {
 	RTE_SET_USED(args);
-	service_tick++;
+
+	service_calls++;
+
+	switch (rte_rand_max(3)) {
+	case 0:
+		return 0;
+	case 1:
+		service_idle_calls++;
+		return -EAGAIN;
+	default:
+		service_error_calls++;
+		return -ENOENT;
+	}
+
 	rte_delay_ms(SERVICE_DELAY);
-	return 0;
 }
 
 static int32_t dummy_mt_unsafe_cb(void *args)
@@ -121,6 +136,10 @@  unregister_all(void)
 	rte_service_lcore_reset_all();
 	rte_eal_mp_wait_lcore();
 
+	service_calls = 0;
+	service_idle_calls = 0;
+	service_error_calls = 0;
+
 	return TEST_SUCCESS;
 }
 
@@ -295,12 +314,19 @@  service_attr_get(void)
 			"Valid attr_get() call didn't return success");
 	TEST_ASSERT_EQUAL(0, attr_value,
 			"attr_get() call didn't set correct cycles (zero)");
-	/* check correct call count */
+	/* check correct call counts */
 	const int attr_calls = RTE_SERVICE_ATTR_CALL_COUNT;
 	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_calls, &attr_value),
 			"Valid attr_get() call didn't return success");
-	TEST_ASSERT_EQUAL(0, attr_value,
-			"attr_get() call didn't get call count (zero)");
+	TEST_ASSERT_EQUAL(0, attr_value, "Call count was not zero");
+	const int attr_idle_calls = RTE_SERVICE_ATTR_IDLE_CALL_COUNT;
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_idle_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(0, attr_value, "Idle call count was not zero");
+	const int attr_error_calls = RTE_SERVICE_ATTR_ERROR_CALL_COUNT;
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_error_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(0, attr_value, "Error call count was not zero");
 
 	/* Call service to increment cycle count */
 	TEST_ASSERT_EQUAL(0, rte_service_lcore_add(slcore_id),
@@ -331,8 +357,13 @@  service_attr_get(void)
 
 	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_calls, &attr_value),
 			"Valid attr_get() call didn't return success");
-	TEST_ASSERT_EQUAL(1, (attr_value > 0),
-			"attr_get() call didn't get call count (zero)");
+	TEST_ASSERT_EQUAL(service_calls, attr_value, "Unexpected call count");
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_idle_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(service_idle_calls, attr_value, "Unexpected idle call count");
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_error_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(service_error_calls, attr_value, "Unexpected error call count");
 
 	TEST_ASSERT_EQUAL(0, rte_service_attr_reset_all(id),
 			"Valid attr_reset_all() return success");
@@ -341,11 +372,16 @@  service_attr_get(void)
 			"Valid attr_get() call didn't return success");
 	TEST_ASSERT_EQUAL(0, attr_value,
 			"attr_get() call didn't set correct cycles (zero)");
-	/* ensure call count > zero */
+	/* ensure call counts are zero */
 	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_calls, &attr_value),
 			"Valid attr_get() call didn't return success");
-	TEST_ASSERT_EQUAL(0, (attr_value > 0),
-			"attr_get() call didn't get call count (zero)");
+	TEST_ASSERT_EQUAL(0, attr_value, "Call count was not reset");
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_idle_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(0, attr_value, "Idle call count was not reset");
+	TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_error_calls, &attr_value),
+			"Valid attr_get() call didn't return success");
+	TEST_ASSERT_EQUAL(0, attr_value, "Error call count was not reset");
 
 	return unregister_all();
 }
@@ -533,10 +569,10 @@  service_lcore_en_dis_able(void)
 static int
 service_lcore_running_check(void)
 {
-	uint64_t tick = service_tick;
+	uint64_t calls = service_calls;
 	rte_delay_ms(SERVICE_DELAY * 100);
-	/* if (tick != service_tick) we know the lcore as polled the service */
-	return tick != service_tick;
+	bool service_polled = calls != service_calls;
+	return service_polled;
 }
 
 static int
diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..eb3a1070e4 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -55,6 +55,17 @@  New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Extended service cores statistics.**
+
+  Two new per-service counters are added to the service cores framework.
+
+  `RTE_SERVICE_ATTR_IDLE_CALL_COUNT` tracks the number of service function
+  invocations where no actual work was performed.
+
+  `RTE_SERVICE_ATTR_ERROR_CALL_COUNT` tracks the number invocations
+  resulting in an error.
+
+  The new statistics are useful for debugging and profiling.
 
 Removed Items
 -------------
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index 56379930b6..ab671daa6d 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -57,6 +57,8 @@  struct __rte_cache_aligned rte_service_spec_impl {
 
 struct service_stats {
 	RTE_ATOMIC(uint64_t) calls;
+	RTE_ATOMIC(uint64_t) idle_calls;
+	RTE_ATOMIC(uint64_t) error_calls;
 	RTE_ATOMIC(uint64_t) cycles;
 };
 
@@ -369,6 +371,16 @@  rte_service_runstate_get(uint32_t id)
 
 }
 
+static void
+service_counter_add(uint64_t *counter, uint64_t value)
+{
+	/* The lcore service worker thread is the only writer, and
+	 * thus only a non-atomic load and an atomic store is needed,
+	 * and not the more expensive atomic add.
+	 */
+	rte_atomic_store_explicit(counter, *counter + value, rte_memory_order_relaxed);
+}
+
 static inline void
 service_runner_do_callback(struct rte_service_spec_impl *s,
 			   struct core_state *cs, uint32_t service_idx)
@@ -380,27 +392,23 @@  service_runner_do_callback(struct rte_service_spec_impl *s,
 		uint64_t start = rte_rdtsc();
 		int rc = s->spec.callback(userdata);
 
-		/* The lcore service worker thread is the only writer,
-		 * and thus only a non-atomic load and an atomic store
-		 * is needed, and not the more expensive atomic
-		 * add.
-		 */
 		struct service_stats *service_stats =
 			&cs->service_stats[service_idx];
 
+		service_counter_add(&service_stats->calls, 1);
+
+		if (rc == -EAGAIN)
+			service_counter_add(&service_stats->idle_calls, 1);
+		else if (rc != 0)
+			service_counter_add(&service_stats->error_calls, 1);
+
 		if (likely(rc != -EAGAIN)) {
 			uint64_t end = rte_rdtsc();
 			uint64_t cycles = end - start;
 
-			rte_atomic_store_explicit(&cs->cycles, cs->cycles + cycles,
-				rte_memory_order_relaxed);
-			rte_atomic_store_explicit(&service_stats->cycles,
-				service_stats->cycles + cycles,
-				rte_memory_order_relaxed);
+			service_counter_add(&cs->cycles, cycles);
+			service_counter_add(&service_stats->cycles, cycles);
 		}
-
-		rte_atomic_store_explicit(&service_stats->calls,
-			service_stats->calls + 1, rte_memory_order_relaxed);
 	} else {
 		s->spec.callback(userdata);
 	}
@@ -867,6 +875,24 @@  lcore_attr_get_service_calls(uint32_t service_id, unsigned int lcore)
 		rte_memory_order_relaxed);
 }
 
+static uint64_t
+lcore_attr_get_service_idle_calls(uint32_t service_id, unsigned int lcore)
+{
+	struct core_state *cs = &lcore_states[lcore];
+
+	return rte_atomic_load_explicit(&cs->service_stats[service_id].idle_calls,
+		rte_memory_order_relaxed);
+}
+
+static uint64_t
+lcore_attr_get_service_error_calls(uint32_t service_id, unsigned int lcore)
+{
+	struct core_state *cs = &lcore_states[lcore];
+
+	return rte_atomic_load_explicit(&cs->service_stats[service_id].error_calls,
+		rte_memory_order_relaxed);
+}
+
 static uint64_t
 lcore_attr_get_service_cycles(uint32_t service_id, unsigned int lcore)
 {
@@ -899,6 +925,18 @@  attr_get_service_calls(uint32_t service_id)
 	return attr_get(service_id, lcore_attr_get_service_calls);
 }
 
+static uint64_t
+attr_get_service_idle_calls(uint32_t service_id)
+{
+	return attr_get(service_id, lcore_attr_get_service_idle_calls);
+}
+
+static uint64_t
+attr_get_service_error_calls(uint32_t service_id)
+{
+	return attr_get(service_id, lcore_attr_get_service_error_calls);
+}
+
 static uint64_t
 attr_get_service_cycles(uint32_t service_id)
 {
@@ -918,6 +956,12 @@  rte_service_attr_get(uint32_t id, uint32_t attr_id, uint64_t *attr_value)
 	case RTE_SERVICE_ATTR_CALL_COUNT:
 		*attr_value = attr_get_service_calls(id);
 		return 0;
+	case RTE_SERVICE_ATTR_IDLE_CALL_COUNT:
+		*attr_value = attr_get_service_idle_calls(id);
+		return 0;
+	case RTE_SERVICE_ATTR_ERROR_CALL_COUNT:
+		*attr_value = attr_get_service_error_calls(id);
+		return 0;
 	case RTE_SERVICE_ATTR_CYCLES:
 		*attr_value = attr_get_service_cycles(id);
 		return 0;
diff --git a/lib/eal/include/rte_service.h b/lib/eal/include/rte_service.h
index e49a7a877e..89057df585 100644
--- a/lib/eal/include/rte_service.h
+++ b/lib/eal/include/rte_service.h
@@ -374,15 +374,32 @@  int32_t rte_service_lcore_count_services(uint32_t lcore);
 int32_t rte_service_dump(FILE *f, uint32_t id);
 
 /**
- * Returns the number of cycles that this service has consumed
+ * Returns the number of cycles that this service has consumed. Only
+ * cycles spent in non-idle calls (i.e., calls not returning -EAGAIN)
+ * count.
  */
 #define RTE_SERVICE_ATTR_CYCLES 0
 
 /**
- * Returns the count of invocations of this service function
+ * Returns the total number of invocations of this service function
+ * (regardless of return value).
  */
 #define RTE_SERVICE_ATTR_CALL_COUNT 1
 
+/**
+ * Returns the number of invocations of this service function where the
+ * service reported having not performed any useful work (i.e.,
+ * returned -EAGAIN).
+ */
+#define RTE_SERVICE_ATTR_IDLE_CALL_COUNT 2
+
+/**
+ * Returns the number of invocations of this service function where the
+ * service reported an error (i.e., the return value was neither 0 nor
+ * -EAGAIN).
+ */
+#define RTE_SERVICE_ATTR_ERROR_CALL_COUNT 3
+
 /**
  * Get an attribute from a service.
  *
@@ -408,7 +425,8 @@  int32_t rte_service_attr_reset_all(uint32_t id);
 
 /**
  * Returns the total number of cycles that the lcore has spent on
- * running services.
+ * running services. Only non-idle calls (i.e., calls not returning
+ * -EAGAIN) count toward this total.
  */
 #define RTE_SERVICE_LCORE_ATTR_CYCLES 1