[v6,2/5] eal: allow applications to report their cpu usage

Message ID 20230119150656.418404-3-rjarry@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series lcore telemetry improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Robin Jarry Jan. 19, 2023, 3:06 p.m. UTC
  Allow applications to register a callback that will be invoked in
rte_lcore_dump() and when requesting lcore info in the telemetry API.

The callback is expected to return the number of TSC cycles that have
passed since application start and the number of these cycles that were
spent doing busy work.

Signed-off-by: Robin Jarry <rjarry@redhat.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---

Notes:
    v5 -> v6: Added/rephrased some inline comments.

 lib/eal/common/eal_common_lcore.c | 45 ++++++++++++++++++++++++++++---
 lib/eal/include/rte_lcore.h       | 35 ++++++++++++++++++++++++
 lib/eal/version.map               |  1 +
 3 files changed, 78 insertions(+), 3 deletions(-)
  

Comments

Kevin Laatz Jan. 19, 2023, 7:42 p.m. UTC | #1
On 19/01/2023 15:06, Robin Jarry wrote:
> Allow applications to register a callback that will be invoked in
> rte_lcore_dump() and when requesting lcore info in the telemetry API.
>
> The callback is expected to return the number of TSC cycles that have
> passed since application start and the number of these cycles that were
> spent doing busy work.
>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>
> Notes:
>      v5 -> v6: Added/rephrased some inline comments.
>
>   lib/eal/common/eal_common_lcore.c | 45 ++++++++++++++++++++++++++++---
>   lib/eal/include/rte_lcore.h       | 35 ++++++++++++++++++++++++
>   lib/eal/version.map               |  1 +
>   3 files changed, 78 insertions(+), 3 deletions(-)

Reviewed-by: Kevin Laatz <kevin.laatz@intel.com>
  
David Marchand Jan. 26, 2023, 11:22 a.m. UTC | #2
On Thu, Jan 19, 2023 at 4:08 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> Allow applications to register a callback that will be invoked in
> rte_lcore_dump() and when requesting lcore info in the telemetry API.
>
> The callback is expected to return the number of TSC cycles that have
> passed since application start and the number of these cycles that were
> spent doing busy work.
>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>
> Notes:
>     v5 -> v6: Added/rephrased some inline comments.
>
>  lib/eal/common/eal_common_lcore.c | 45 ++++++++++++++++++++++++++++---
>  lib/eal/include/rte_lcore.h       | 35 ++++++++++++++++++++++++
>  lib/eal/version.map               |  1 +
>  3 files changed, 78 insertions(+), 3 deletions(-)
>
> diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
> index 16548977dce8..80513cfe3725 100644
> --- a/lib/eal/common/eal_common_lcore.c
> +++ b/lib/eal/common/eal_common_lcore.c
> @@ -2,6 +2,7 @@
>   * Copyright(c) 2010-2014 Intel Corporation
>   */
>
> +#include <inttypes.h>
>  #include <stdlib.h>
>  #include <string.h>
>
> @@ -422,11 +423,21 @@ rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
>         return ret;
>  }
>
> +static rte_lcore_usage_cb lcore_usage_cb;
> +
> +void
> +rte_lcore_register_usage_cb(rte_lcore_usage_cb cb)
> +{
> +       lcore_usage_cb = cb;
> +}
> +
>  static int
>  lcore_dump_cb(unsigned int lcore_id, void *arg)
>  {
>         struct rte_config *cfg = rte_eal_get_configuration();
> -       char cpuset[RTE_CPU_AFFINITY_STR_LEN];
> +       char cpuset[RTE_CPU_AFFINITY_STR_LEN], usage_str[256];

This is a debug/non performance sensitive helper.
Please remove this "big enough for now" buffer and use a dynamic allocation.


> +       struct rte_lcore_usage usage;
> +       rte_lcore_usage_cb usage_cb;
>         const char *role;
>         FILE *f = arg;
>         int ret;
> @@ -446,11 +457,25 @@ lcore_dump_cb(unsigned int lcore_id, void *arg)
>                 break;
>         }
>
> +       /* The callback may not set all the fields in the structure, so clear it here. */
> +       memset(&usage, 0, sizeof(usage));
> +       usage_str[0] = '\0';
> +       /*
> +        * Guard against concurrent modification of lcore_usage_cb.
> +        * rte_lcore_register_usage_cb() should only be called once at application init
> +        * but nothing prevents and application to reset the callback to NULL.
> +        */
> +       usage_cb = lcore_usage_cb;
> +       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) {
> +               snprintf(usage_str, sizeof(usage_str), ", busy cycles %"PRIu64"/%"PRIu64,
> +                       usage.busy_cycles, usage.total_cycles);
> +       }
>         ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
>                 sizeof(cpuset));
> -       fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
> +       fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s%s\n", lcore_id,
>                 rte_lcore_to_socket_id(lcore_id), role, cpuset,
> -               ret == 0 ? "" : "...");
> +               ret == 0 ? "" : "...", usage_str);
> +
>         return 0;
>  }
>
> @@ -489,7 +514,9 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
>  {
>         struct lcore_telemetry_info *info = arg;
>         struct rte_config *cfg = rte_eal_get_configuration();
> +       struct rte_lcore_usage usage;
>         struct rte_tel_data *cpuset;
> +       rte_lcore_usage_cb usage_cb;
>         const char *role;
>         unsigned int cpu;

Reverse xmas tree please.

>
> @@ -522,6 +549,18 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
>                 if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
>                         rte_tel_data_add_array_int(cpuset, cpu);
>         rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
> +       /* The callback may not set all the fields in the structure, so clear it here. */
> +       memset(&usage, 0, sizeof(usage));
> +       /*
> +        * Guard against concurrent modification of lcore_usage_cb.
> +        * rte_lcore_register_usage_cb() should only be called once at application init
> +        * but nothing prevents and application to reset the callback to NULL.
> +        */
> +       usage_cb = lcore_usage_cb;
> +       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) {
> +               rte_tel_data_add_dict_u64(info->d, "total_cycles", usage.total_cycles);
> +               rte_tel_data_add_dict_u64(info->d, "busy_cycles", usage.busy_cycles);
> +       }
>
>         return 0;
>  }
> diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
> index 6938c3fd7b81..52468e7120dd 100644
> --- a/lib/eal/include/rte_lcore.h
> +++ b/lib/eal/include/rte_lcore.h
> @@ -328,6 +328,41 @@ typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
>  int
>  rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
>
> +/**
> + * CPU usage statistics.

Let's be consistent and use lcore.


> + */
> +struct rte_lcore_usage {
> +       uint64_t total_cycles;
> +       /**< The total amount of time since application start, in TSC cycles. */
> +       uint64_t busy_cycles;
> +       /**< The amount of busy time since application start, in TSC cycles. */

This is confusing to have the comments after.
Please put those comments before the associated fields (using /** ).


> +};
> +
> +/**
> + * Callback to allow applications to report CPU usage.

lcore*

> + *
> + * @param [in] lcore_id
> + *   The lcore to consider.
> + * @param [out] usage
> + *   Counters representing this lcore usage. This can never be NULL.
> + * @return
> + *   - 0 if fields in usage were updated successfully. The fields that the
> + *       application does not support must not be modified.
> + *   - a negative value if the information is not available or if any error occurred.
> + */
> +typedef int (*rte_lcore_usage_cb)(unsigned int lcore_id, struct rte_lcore_usage *usage);
> +
> +/**
> + * Register a callback from an application to be called in rte_lcore_dump() and
> + * the /eal/lcore/info telemetry endpoint handler. Applications are expected to
> + * report CPU usage statistics via this callback.

lcore*

> + *
> + * @param cb
> + *   The callback function.
> + */
> +__rte_experimental
> +void rte_lcore_register_usage_cb(rte_lcore_usage_cb cb);
> +
>  /**
>   * List all lcores.
>   *
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index 7ad12a7dc985..30fd216a12ea 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -440,6 +440,7 @@ EXPERIMENTAL {
>         rte_thread_detach;
>         rte_thread_equal;
>         rte_thread_join;
> +       rte_lcore_register_usage_cb;

Please start a new block for 23.03 symbols.
  

Patch

diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
index 16548977dce8..80513cfe3725 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -2,6 +2,7 @@ 
  * Copyright(c) 2010-2014 Intel Corporation
  */
 
+#include <inttypes.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -422,11 +423,21 @@  rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
 	return ret;
 }
 
+static rte_lcore_usage_cb lcore_usage_cb;
+
+void
+rte_lcore_register_usage_cb(rte_lcore_usage_cb cb)
+{
+	lcore_usage_cb = cb;
+}
+
 static int
 lcore_dump_cb(unsigned int lcore_id, void *arg)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
-	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN], usage_str[256];
+	struct rte_lcore_usage usage;
+	rte_lcore_usage_cb usage_cb;
 	const char *role;
 	FILE *f = arg;
 	int ret;
@@ -446,11 +457,25 @@  lcore_dump_cb(unsigned int lcore_id, void *arg)
 		break;
 	}
 
+	/* The callback may not set all the fields in the structure, so clear it here. */
+	memset(&usage, 0, sizeof(usage));
+	usage_str[0] = '\0';
+	/*
+	 * Guard against concurrent modification of lcore_usage_cb.
+	 * rte_lcore_register_usage_cb() should only be called once at application init
+	 * but nothing prevents and application to reset the callback to NULL.
+	 */
+	usage_cb = lcore_usage_cb;
+	if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) {
+		snprintf(usage_str, sizeof(usage_str), ", busy cycles %"PRIu64"/%"PRIu64,
+			usage.busy_cycles, usage.total_cycles);
+	}
 	ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset,
 		sizeof(cpuset));
-	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id,
+	fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s%s\n", lcore_id,
 		rte_lcore_to_socket_id(lcore_id), role, cpuset,
-		ret == 0 ? "" : "...");
+		ret == 0 ? "" : "...", usage_str);
+
 	return 0;
 }
 
@@ -489,7 +514,9 @@  lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
 {
 	struct lcore_telemetry_info *info = arg;
 	struct rte_config *cfg = rte_eal_get_configuration();
+	struct rte_lcore_usage usage;
 	struct rte_tel_data *cpuset;
+	rte_lcore_usage_cb usage_cb;
 	const char *role;
 	unsigned int cpu;
 
@@ -522,6 +549,18 @@  lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
 		if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
 			rte_tel_data_add_array_int(cpuset, cpu);
 	rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
+	/* The callback may not set all the fields in the structure, so clear it here. */
+	memset(&usage, 0, sizeof(usage));
+	/*
+	 * Guard against concurrent modification of lcore_usage_cb.
+	 * rte_lcore_register_usage_cb() should only be called once at application init
+	 * but nothing prevents and application to reset the callback to NULL.
+	 */
+	usage_cb = lcore_usage_cb;
+	if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) {
+		rte_tel_data_add_dict_u64(info->d, "total_cycles", usage.total_cycles);
+		rte_tel_data_add_dict_u64(info->d, "busy_cycles", usage.busy_cycles);
+	}
 
 	return 0;
 }
diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
index 6938c3fd7b81..52468e7120dd 100644
--- a/lib/eal/include/rte_lcore.h
+++ b/lib/eal/include/rte_lcore.h
@@ -328,6 +328,41 @@  typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
 int
 rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg);
 
+/**
+ * CPU usage statistics.
+ */
+struct rte_lcore_usage {
+	uint64_t total_cycles;
+	/**< The total amount of time since application start, in TSC cycles. */
+	uint64_t busy_cycles;
+	/**< The amount of busy time since application start, in TSC cycles. */
+};
+
+/**
+ * Callback to allow applications to report CPU usage.
+ *
+ * @param [in] lcore_id
+ *   The lcore to consider.
+ * @param [out] usage
+ *   Counters representing this lcore usage. This can never be NULL.
+ * @return
+ *   - 0 if fields in usage were updated successfully. The fields that the
+ *       application does not support must not be modified.
+ *   - a negative value if the information is not available or if any error occurred.
+ */
+typedef int (*rte_lcore_usage_cb)(unsigned int lcore_id, struct rte_lcore_usage *usage);
+
+/**
+ * Register a callback from an application to be called in rte_lcore_dump() and
+ * the /eal/lcore/info telemetry endpoint handler. Applications are expected to
+ * report CPU usage statistics via this callback.
+ *
+ * @param cb
+ *   The callback function.
+ */
+__rte_experimental
+void rte_lcore_register_usage_cb(rte_lcore_usage_cb cb);
+
 /**
  * List all lcores.
  *
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 7ad12a7dc985..30fd216a12ea 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -440,6 +440,7 @@  EXPERIMENTAL {
 	rte_thread_detach;
 	rte_thread_equal;
 	rte_thread_join;
+	rte_lcore_register_usage_cb;
 };
 
 INTERNAL {