[v5,1/4] eal: add lcore info in telemetry

Message ID 20221216102109.64142-2-rjarry@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series lcore telemetry improvements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Robin Jarry Dec. 16, 2022, 10:21 a.m. UTC
  Report the same information than rte_lcore_dump() in the telemetry
API into /eal/lcore/list and /eal/lcore/info,ID.

Example:

  --> /eal/lcore/info,3
  {
    "/eal/lcore/info": {
      "lcore_id": 3,
      "socket": 0,
      "role": "RTE",
      "cpuset": [
        3
      ]
    }
  }

Signed-off-by: Robin Jarry <rjarry@redhat.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
v4 -> v5: No change

 lib/eal/common/eal_common_lcore.c | 96 +++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
  

Comments

Kevin Laatz Jan. 18, 2023, 9:42 a.m. UTC | #1
On 16/12/2022 10:21, Robin Jarry wrote:
> Report the same information than rte_lcore_dump() in the telemetry
> API into /eal/lcore/list and /eal/lcore/info,ID.
>
> Example:
>
>    --> /eal/lcore/info,3
>    {
>      "/eal/lcore/info": {
>        "lcore_id": 3,
>        "socket": 0,
>        "role": "RTE",
>        "cpuset": [
>          3
>        ]
>      }
>    }
>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v4 -> v5: No change
>
>   lib/eal/common/eal_common_lcore.c | 96 +++++++++++++++++++++++++++++++
>   1 file changed, 96 insertions(+)
>
> diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
> index 06c594b0224f..16548977dce8 100644
> --- a/lib/eal/common/eal_common_lcore.c
> +++ b/lib/eal/common/eal_common_lcore.c
> @@ -10,6 +10,9 @@
>   #include <rte_errno.h>
>   #include <rte_lcore.h>
>   #include <rte_log.h>
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +#include <rte_telemetry.h>
> +#endif
>   
>   #include "eal_private.h"
>   #include "eal_thread.h"
> @@ -456,3 +459,96 @@ rte_lcore_dump(FILE *f)
>   {
>   	rte_lcore_iterate(lcore_dump_cb, f);
>   }
> +
> +#ifndef RTE_EXEC_ENV_WINDOWS
> +static int
> +lcore_telemetry_id_cb(unsigned int lcore_id, void *arg)
> +{
> +	struct rte_tel_data *d = arg;
> +	return rte_tel_data_add_array_int(d, lcore_id);
> +}
> +
> +static int
> +handle_lcore_list(const char *cmd __rte_unused,
> +			const char *params __rte_unused,
> +			struct rte_tel_data *d)
> +{
> +	int ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
> +	if (ret)
> +		return ret;
> +	return rte_lcore_iterate(lcore_telemetry_id_cb, d);
> +}
> +
> +struct lcore_telemetry_info {
> +	unsigned int lcore_id;
> +	struct rte_tel_data *d;
> +};
> +
> +static int
> +lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
> +{
> +	struct lcore_telemetry_info *info = arg;
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	struct rte_tel_data *cpuset;
> +	const char *role;
> +	unsigned int cpu;
> +
> +	if (info->lcore_id != lcore_id)
> +		return 0;
> +
> +	switch (cfg->lcore_role[lcore_id]) {
> +	case ROLE_RTE:
> +		role = "RTE";
> +		break;
> +	case ROLE_SERVICE:
> +		role = "SERVICE";
> +		break;
> +	case ROLE_NON_EAL:
> +		role = "NON_EAL";
> +		break;
> +	default:
> +		role = "UNKNOWN";
> +		break;
> +	}
> +	rte_tel_data_start_dict(info->d);
> +	rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id);
> +	rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id));
> +	rte_tel_data_add_dict_string(info->d, "role", role);
> +	cpuset = rte_tel_data_alloc();
> +	if (!cpuset)
> +		return -ENOMEM;
> +	rte_tel_data_start_array(cpuset, RTE_TEL_INT_VAL);
> +	for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
> +		if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
> +			rte_tel_data_add_array_int(cpuset, cpu);
> +	rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
> +
> +	return 0;
> +}
> +
> +static int
> +handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_tel_data *d)
> +{
> +	struct lcore_telemetry_info info = { .d = d };
> +	char *endptr = NULL;
> +	if (params == NULL || strlen(params) == 0)
> +		return -EINVAL;
> +	errno = 0;
> +	info.lcore_id = strtoul(params, &endptr, 10);
> +	if (errno)
> +		return -errno;
> +	if (endptr == params)
> +		return -EINVAL;
> +	return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
> +}
> +
> +RTE_INIT(lcore_telemetry)
> +{
> +	rte_telemetry_register_cmd(
> +			"/eal/lcore/list", handle_lcore_list,
> +			"List of lcore ids. Takes no parameters");
> +	rte_telemetry_register_cmd(
> +			"/eal/lcore/info", handle_lcore_info,
> +			"Returns lcore info. Parameters: int lcore_id");
> +}
> +#endif /* !RTE_EXEC_ENV_WINDOWS */

Hi Robin,

Thanks for taking the time to work on this. It is a good implementation 
for debug use-cases.

I have 2 suggestions which would improve the usability of the data:
1. Could we make the lcore_id paramater on /eal/lcore/info optional? 
This would allow users to read info for all lcores in the application at 
once.
2. Could we add 2 additional telemetry endpoints? One which returns an 
array of busy_cycles values and the other returns an array of 
total_cycles values. These arrays could be used in conjunction with the 
/eal/lcore/list endpoint to quickly read the usage related metrics. I've 
included an example diff below [1].

We have a use-case beyond debugging in which we read telemetry every few 
milliseconds. From a performance point of view, adding the 2 additional 
endpoints would be very beneficial.

Thanks,
Kevin

[1]

diff --git a/lib/eal/common/eal_common_lcore.c 
b/lib/eal/common/eal_common_lcore.c
index 210636d21d..94ddb276c5 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused, 
const char *params, struct rte_t
         return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
  }

+static int
+lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
+{
+       struct rte_tel_data *d = arg;
+       struct rte_lcore_usage usage;
+       rte_lcore_usage_cb usage_cb;
+       unsigned long cycles = 0;
+
+       memset(&usage, 0, sizeof(usage));
+       usage_cb = lcore_usage_cb;
+       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
+               cycles = usage.busy_cycles;
+
+       return rte_tel_data_add_array_u64(d, cycles);
+}
+
+static int
+handle_lcore_busy_cycles(const char *cmd __rte_unused,
+               const char *params __rte_unused, struct rte_tel_data *d)
+{
+       int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
+       if (ret)
+               return ret;
+       return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
+}
+
  RTE_INIT(lcore_telemetry)
  {
         rte_telemetry_register_cmd(
@@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
         rte_telemetry_register_cmd(
                         "/eal/lcore/info", handle_lcore_info,
                         "Returns lcore info. Parameters: int lcore_id");
+       rte_telemetry_register_cmd(
+                       "/eal/lcore/busy_cycles", handle_lcore_busy_cycles,
+                       "List of busy cycle values. Takes no parameters");
  }
  #endif /* !RTE_EXEC_ENV_WINDOWS */
  
Morten Brørup Jan. 18, 2023, 10:21 a.m. UTC | #2
> From: Kevin Laatz [mailto:kevin.laatz@intel.com]
> Sent: Wednesday, 18 January 2023 10.42
> To: Robin Jarry; dev@dpdk.org
> Cc: Tyler Retzlaff; Morten Brørup
> Subject: Re: [PATCH v5 1/4] eal: add lcore info in telemetry
> 
> On 16/12/2022 10:21, Robin Jarry wrote:
> > Report the same information than rte_lcore_dump() in the telemetry
> > API into /eal/lcore/list and /eal/lcore/info,ID.
> >
> > Example:
> >
> >    --> /eal/lcore/info,3
> >    {
> >      "/eal/lcore/info": {
> >        "lcore_id": 3,
> >        "socket": 0,
> >        "role": "RTE",
> >        "cpuset": [
> >          3
> >        ]
> >      }
> >    }
> >
> > Signed-off-by: Robin Jarry <rjarry@redhat.com>
> > Acked-by: Morten Brørup <mb@smartsharesystems.com>
> > ---


> Hi Robin,
> 
> Thanks for taking the time to work on this. It is a good implementation
> for debug use-cases.
> 
> I have 2 suggestions which would improve the usability of the data:
> 1. Could we make the lcore_id paramater on /eal/lcore/info optional?
> This would allow users to read info for all lcores in the application
> at
> once.

+1 to this suggestion.

> 2. Could we add 2 additional telemetry endpoints? One which returns an
> array of busy_cycles values and the other returns an array of
> total_cycles values. These arrays could be used in conjunction with the
> /eal/lcore/list endpoint to quickly read the usage related metrics.
> I've
> included an example diff below [1].

I prefer this done in a more generic way, see below.

> 
> We have a use-case beyond debugging in which we read telemetry every
> few
> milliseconds. From a performance point of view, adding the 2 additional
> endpoints would be very beneficial.
> 
> Thanks,
> Kevin
> 
> [1]
> 
> diff --git a/lib/eal/common/eal_common_lcore.c
> b/lib/eal/common/eal_common_lcore.c
> index 210636d21d..94ddb276c5 100644
> --- a/lib/eal/common/eal_common_lcore.c
> +++ b/lib/eal/common/eal_common_lcore.c
> @@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused,
> const char *params, struct rte_t
>          return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
>   }
> 
> +static int
> +lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
> +{
> +       struct rte_tel_data *d = arg;
> +       struct rte_lcore_usage usage;
> +       rte_lcore_usage_cb usage_cb;
> +       unsigned long cycles = 0;
> +
> +       memset(&usage, 0, sizeof(usage));
> +       usage_cb = lcore_usage_cb;
> +       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
> +               cycles = usage.busy_cycles;
> +
> +       return rte_tel_data_add_array_u64(d, cycles);
> +}
> +
> +static int
> +handle_lcore_busy_cycles(const char *cmd __rte_unused,
> +               const char *params __rte_unused, struct rte_tel_data
> *d)
> +{
> +       int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
> +       if (ret)
> +               return ret;
> +       return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
> +}
> +
>   RTE_INIT(lcore_telemetry)
>   {
>          rte_telemetry_register_cmd(
> @@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
>          rte_telemetry_register_cmd(
>                          "/eal/lcore/info", handle_lcore_info,
>                          "Returns lcore info. Parameters: int
> lcore_id");
> +       rte_telemetry_register_cmd(
> +                       "/eal/lcore/busy_cycles",
> handle_lcore_busy_cycles,
> +                       "List of busy cycle values. Takes no
> parameters");
>   }
>   #endif /* !RTE_EXEC_ENV_WINDOWS */

This should be generalized to support any named field in the rte_lcore_usage structure.

The general path could be: /eal/lcore/usage

With optional parameter lcore_id. This should return one object (or an array of such objects, if lcore_id is not given) with all usage fields and their values, e.g.:

{
"lcore_id": 7,
"total_cycles": 1234,
"usage_cycles": 567
}


The paths to support the array-optimized feature you are requesting could be: /eal/lcores/usage/total_cycles and /eal/lcores/usage/usage_cycles.

These paths should return the arrays as suggested. I only request that you change "/lcore" to plural "/lcores" and add "/usage" to the path before the field name in the usage table.

Alternatively, you could add a path /eal/lcores/usage_array, taking the field names as parameters and outputting multiple arrays like this:

/eal/lcores/usage_array,total_cycles,usage_cycles

{
"total_cycles": [1234, 1234, 1234],
"usage_cycles": [567, 678, 789]
}

But I don't know if this breaks with DPDK's standard REST interface. It would be easier if we had decided on something like OData, instead of inventing our own.
  
Kevin Laatz Jan. 18, 2023, 11:03 a.m. UTC | #3
On 18/01/2023 10:21, Morten Brørup wrote:
>> From: Kevin Laatz [mailto:kevin.laatz@intel.com]
>> Sent: Wednesday, 18 January 2023 10.42
>> To: Robin Jarry; dev@dpdk.org
>> Cc: Tyler Retzlaff; Morten Brørup
>> Subject: Re: [PATCH v5 1/4] eal: add lcore info in telemetry
>>
>> On 16/12/2022 10:21, Robin Jarry wrote:
>>> Report the same information than rte_lcore_dump() in the telemetry
>>> API into /eal/lcore/list and /eal/lcore/info,ID.
>>>
>>> Example:
>>>
>>>     --> /eal/lcore/info,3
>>>     {
>>>       "/eal/lcore/info": {
>>>         "lcore_id": 3,
>>>         "socket": 0,
>>>         "role": "RTE",
>>>         "cpuset": [
>>>           3
>>>         ]
>>>       }
>>>     }
>>>
>>> Signed-off-by: Robin Jarry <rjarry@redhat.com>
>>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
>>> ---
>
>> Hi Robin,
>>
>> Thanks for taking the time to work on this. It is a good implementation
>> for debug use-cases.
>>
>> I have 2 suggestions which would improve the usability of the data:
>> 1. Could we make the lcore_id paramater on /eal/lcore/info optional?
>> This would allow users to read info for all lcores in the application
>> at
>> once.
> +1 to this suggestion.
>
>> 2. Could we add 2 additional telemetry endpoints? One which returns an
>> array of busy_cycles values and the other returns an array of
>> total_cycles values. These arrays could be used in conjunction with the
>> /eal/lcore/list endpoint to quickly read the usage related metrics.
>> I've
>> included an example diff below [1].
> I prefer this done in a more generic way, see below.
>
>> We have a use-case beyond debugging in which we read telemetry every
>> few
>> milliseconds. From a performance point of view, adding the 2 additional
>> endpoints would be very beneficial.
>>
>> Thanks,
>> Kevin
>>
>> [1]
>>
>> diff --git a/lib/eal/common/eal_common_lcore.c
>> b/lib/eal/common/eal_common_lcore.c
>> index 210636d21d..94ddb276c5 100644
>> --- a/lib/eal/common/eal_common_lcore.c
>> +++ b/lib/eal/common/eal_common_lcore.c
>> @@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused,
>> const char *params, struct rte_t
>>           return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
>>    }
>>
>> +static int
>> +lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
>> +{
>> +       struct rte_tel_data *d = arg;
>> +       struct rte_lcore_usage usage;
>> +       rte_lcore_usage_cb usage_cb;
>> +       unsigned long cycles = 0;
>> +
>> +       memset(&usage, 0, sizeof(usage));
>> +       usage_cb = lcore_usage_cb;
>> +       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
>> +               cycles = usage.busy_cycles;
>> +
>> +       return rte_tel_data_add_array_u64(d, cycles);
>> +}
>> +
>> +static int
>> +handle_lcore_busy_cycles(const char *cmd __rte_unused,
>> +               const char *params __rte_unused, struct rte_tel_data
>> *d)
>> +{
>> +       int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
>> +       if (ret)
>> +               return ret;
>> +       return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
>> +}
>> +
>>    RTE_INIT(lcore_telemetry)
>>    {
>>           rte_telemetry_register_cmd(
>> @@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
>>           rte_telemetry_register_cmd(
>>                           "/eal/lcore/info", handle_lcore_info,
>>                           "Returns lcore info. Parameters: int
>> lcore_id");
>> +       rte_telemetry_register_cmd(
>> +                       "/eal/lcore/busy_cycles",
>> handle_lcore_busy_cycles,
>> +                       "List of busy cycle values. Takes no
>> parameters");
>>    }
>>    #endif /* !RTE_EXEC_ENV_WINDOWS */
> This should be generalized to support any named field in the rte_lcore_usage structure.
>
> The general path could be: /eal/lcore/usage
>
> With optional parameter lcore_id. This should return one object (or an array of such objects, if lcore_id is not given) with all usage fields and their values, e.g.:
>
> {
> "lcore_id": 7,
> "total_cycles": 1234,
> "usage_cycles": 567
> }
>
>
> The paths to support the array-optimized feature you are requesting could be: /eal/lcores/usage/total_cycles and /eal/lcores/usage/usage_cycles.
>
> These paths should return the arrays as suggested. I only request that you change "/lcore" to plural "/lcores" and add "/usage" to the path before the field name in the usage table.
>
> Alternatively, you could add a path /eal/lcores/usage_array, taking the field names as parameters and outputting multiple arrays like this:
>
> /eal/lcores/usage_array,total_cycles,usage_cycles
>
> {
> "total_cycles": [1234, 1234, 1234],
> "usage_cycles": [567, 678, 789]
> }

+1, this would also work nicely and allows for extension in future 
without flooding with endpoints.


>
> But I don't know if this breaks with DPDK's standard REST interface. It would be easier if we had decided on something like OData, instead of inventing our own.
>
>
  
Morten Brørup Jan. 18, 2023, 11:35 a.m. UTC | #4
> From: Kevin Laatz [mailto:kevin.laatz@intel.com]
> Sent: Wednesday, 18 January 2023 12.03
> 
> On 18/01/2023 10:21, Morten Brørup wrote:
> >> From: Kevin Laatz [mailto:kevin.laatz@intel.com]
> >>
> >> On 16/12/2022 10:21, Robin Jarry wrote:
> >>> Report the same information than rte_lcore_dump() in the telemetry
> >>> API into /eal/lcore/list and /eal/lcore/info,ID.
> >>>
> >>> Example:
> >>>
> >>>     --> /eal/lcore/info,3
> >>>     {
> >>>       "/eal/lcore/info": {
> >>>         "lcore_id": 3,
> >>>         "socket": 0,
> >>>         "role": "RTE",
> >>>         "cpuset": [
> >>>           3
> >>>         ]
> >>>       }
> >>>     }
> >>>
> >>> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> >>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> >>> ---
> >
> >> Hi Robin,
> >>
> >> Thanks for taking the time to work on this. It is a good
> implementation
> >> for debug use-cases.
> >>
> >> I have 2 suggestions which would improve the usability of the data:
> >> 1. Could we make the lcore_id paramater on /eal/lcore/info optional?
> >> This would allow users to read info for all lcores in the
> application
> >> at
> >> once.
> > +1 to this suggestion.
> >
> >> 2. Could we add 2 additional telemetry endpoints? One which returns
> an
> >> array of busy_cycles values and the other returns an array of
> >> total_cycles values. These arrays could be used in conjunction with
> the
> >> /eal/lcore/list endpoint to quickly read the usage related metrics.
> >> I've
> >> included an example diff below [1].
> > I prefer this done in a more generic way, see below.
> >
> >> We have a use-case beyond debugging in which we read telemetry every
> >> few
> >> milliseconds. From a performance point of view, adding the 2
> additional
> >> endpoints would be very beneficial.
> >>
> >> Thanks,
> >> Kevin
> >>
> >> [1]
> >>
> >> diff --git a/lib/eal/common/eal_common_lcore.c
> >> b/lib/eal/common/eal_common_lcore.c
> >> index 210636d21d..94ddb276c5 100644
> >> --- a/lib/eal/common/eal_common_lcore.c
> >> +++ b/lib/eal/common/eal_common_lcore.c
> >> @@ -569,6 +569,32 @@ handle_lcore_info(const char *cmd __rte_unused,
> >> const char *params, struct rte_t
> >>           return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
> >>    }
> >>
> >> +static int
> >> +lcore_telemetry_busy_cycles_cb(unsigned int lcore_id, void *arg)
> >> +{
> >> +       struct rte_tel_data *d = arg;
> >> +       struct rte_lcore_usage usage;
> >> +       rte_lcore_usage_cb usage_cb;
> >> +       unsigned long cycles = 0;
> >> +
> >> +       memset(&usage, 0, sizeof(usage));
> >> +       usage_cb = lcore_usage_cb;
> >> +       if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0)
> >> +               cycles = usage.busy_cycles;
> >> +
> >> +       return rte_tel_data_add_array_u64(d, cycles);
> >> +}
> >> +
> >> +static int
> >> +handle_lcore_busy_cycles(const char *cmd __rte_unused,
> >> +               const char *params __rte_unused, struct rte_tel_data
> >> *d)
> >> +{
> >> +       int ret = rte_tel_data_start_array(d, RTE_TEL_U64_VAL);
> >> +       if (ret)
> >> +               return ret;
> >> +       return rte_lcore_iterate(lcore_telemetry_busy_cycles_cb, d);
> >> +}
> >> +
> >>    RTE_INIT(lcore_telemetry)
> >>    {
> >>           rte_telemetry_register_cmd(
> >> @@ -577,5 +603,8 @@ RTE_INIT(lcore_telemetry)
> >>           rte_telemetry_register_cmd(
> >>                           "/eal/lcore/info", handle_lcore_info,
> >>                           "Returns lcore info. Parameters: int
> >> lcore_id");
> >> +       rte_telemetry_register_cmd(
> >> +                       "/eal/lcore/busy_cycles",
> >> handle_lcore_busy_cycles,
> >> +                       "List of busy cycle values. Takes no
> >> parameters");
> >>    }
> >>    #endif /* !RTE_EXEC_ENV_WINDOWS */
> > This should be generalized to support any named field in the
> rte_lcore_usage structure.
> >
> > The general path could be: /eal/lcore/usage
> >
> > With optional parameter lcore_id. This should return one object (or
> an array of such objects, if lcore_id is not given) with all usage
> fields and their values, e.g.:
> >
> > {
> > "lcore_id": 7,
> > "total_cycles": 1234,
> > "usage_cycles": 567
> > }
> >
> >
> > The paths to support the array-optimized feature you are requesting
> could be: /eal/lcores/usage/total_cycles and
> /eal/lcores/usage/usage_cycles.
> >
> > These paths should return the arrays as suggested. I only request
> that you change "/lcore" to plural "/lcores" and add "/usage" to the
> path before the field name in the usage table.
> >
> > Alternatively, you could add a path /eal/lcores/usage_array, taking
> the field names as parameters and outputting multiple arrays like this:
> >
> > /eal/lcores/usage_array,total_cycles,usage_cycles
> >
> > {

I forgot the index array:

"lcore_id": [1,6,7],

> > "total_cycles": [1234, 1234, 1234],
> > "usage_cycles": [567, 678, 789]
> > }
> 
> +1, this would also work nicely and allows for extension in future
> without flooding with endpoints.

Our applications do something somewhat similar, i.e. use JSON arrays for performance (and bandwidth conservation) reasons - so I can confirm that the concept works in production.

But come to think of it, your array suggestion seems more like an additional feature, which belongs in another patch. We should not hold back Robin's patch series due to feature creep. :-)

> 
> 
> >
> > But I don't know if this breaks with DPDK's standard REST interface.
> It would be easier if we had decided on something like OData, instead
> of inventing our own.
> >
> >
  
Robin Jarry Jan. 18, 2023, 2:45 p.m. UTC | #5
Kevin Laatz, Jan 18, 2023 at 10:42:
> Hi Robin,
>
> Thanks for taking the time to work on this. It is a good implementation 
> for debug use-cases.
>
> I have 2 suggestions which would improve the usability of the data:
> 1. Could we make the lcore_id paramater on /eal/lcore/info optional? 
> This would allow users to read info for all lcores in the application at 
> once.

I don't think it would be a good thing since it would require returning
a different data format depending the parameter is specifier or not.

Probably adding another endpoint /eal/lcore/info_all that returns a list
of /eal/lcore/info (one for every lcore) would be better.

> 2. Could we add 2 additional telemetry endpoints? One which returns an 
> array of busy_cycles values and the other returns an array of 
> total_cycles values. These arrays could be used in conjunction with the 
> /eal/lcore/list endpoint to quickly read the usage related metrics. I've 
> included an example diff below [1].
>
> We have a use-case beyond debugging in which we read telemetry every few 
> milliseconds. From a performance point of view, adding the 2 additional 
> endpoints would be very beneficial.

If we add /eal/lcore/info_all you would have all this without two
additional endpoints. I don't think that calling it every few
milliseconds and extracting the {busy,total}_cycles values would be
a problem.

I can add another patch in the series but I would prefer not changing
the format at the last minute.

Would that be ok?
  
Kevin Laatz Jan. 18, 2023, 4:01 p.m. UTC | #6
On 18/01/2023 14:45, Robin Jarry wrote:
> Kevin Laatz, Jan 18, 2023 at 10:42:
>> Hi Robin,
>>
>> Thanks for taking the time to work on this. It is a good implementation
>> for debug use-cases.
>>
>> I have 2 suggestions which would improve the usability of the data:
>> 1. Could we make the lcore_id paramater on /eal/lcore/info optional?
>> This would allow users to read info for all lcores in the application at
>> once.
> I don't think it would be a good thing since it would require returning
> a different data format depending the parameter is specifier or not.
>
> Probably adding another endpoint /eal/lcore/info_all that returns a list
> of /eal/lcore/info (one for every lcore) would be better.

Either option seems ok, I don't have a strong preference, the main thing 
here is to get the info for all cores in our telemetry read.


>> 2. Could we add 2 additional telemetry endpoints? One which returns an
>> array of busy_cycles values and the other returns an array of
>> total_cycles values. These arrays could be used in conjunction with the
>> /eal/lcore/list endpoint to quickly read the usage related metrics. I've
>> included an example diff below [1].
>>
>> We have a use-case beyond debugging in which we read telemetry every few
>> milliseconds. From a performance point of view, adding the 2 additional
>> endpoints would be very beneficial.
> If we add /eal/lcore/info_all you would have all this without two
> additional endpoints. I don't think that calling it every few
> milliseconds and extracting the {busy,total}_cycles values would be
> a problem.
>
> I can add another patch in the series but I would prefer not changing
> the format at the last minute.

While all of the information would be available, there are performance 
benefits to reducing the size of data returned and by flattening the 
arrays, in addition to a reduction in the JSON parsing required to 
extract the needed metrics.

The additional endpoint(s) (I like Morten's idea of a single additional 
endpoint where you can specify the metrics to include via parameters) 
shouldn't affect the format of other parts of this patchset, but we 
would gain the benefits of the additional metric format.
  
Robin Jarry Jan. 18, 2023, 4:17 p.m. UTC | #7
Kevin Laatz, Jan 18, 2023 at 17:01:
> The additional endpoint(s) (I like Morten's idea of a single additional 
> endpoint where you can specify the metrics to include via parameters) 
> shouldn't affect the format of other parts of this patchset, but we 
> would gain the benefits of the additional metric format.

Understood. This could probably be added in another patch. If that is
not too much work I can look into it.
  

Patch

diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
index 06c594b0224f..16548977dce8 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -10,6 +10,9 @@ 
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_log.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_telemetry.h>
+#endif
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -456,3 +459,96 @@  rte_lcore_dump(FILE *f)
 {
 	rte_lcore_iterate(lcore_dump_cb, f);
 }
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+lcore_telemetry_id_cb(unsigned int lcore_id, void *arg)
+{
+	struct rte_tel_data *d = arg;
+	return rte_tel_data_add_array_int(d, lcore_id);
+}
+
+static int
+handle_lcore_list(const char *cmd __rte_unused,
+			const char *params __rte_unused,
+			struct rte_tel_data *d)
+{
+	int ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
+	if (ret)
+		return ret;
+	return rte_lcore_iterate(lcore_telemetry_id_cb, d);
+}
+
+struct lcore_telemetry_info {
+	unsigned int lcore_id;
+	struct rte_tel_data *d;
+};
+
+static int
+lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
+{
+	struct lcore_telemetry_info *info = arg;
+	struct rte_config *cfg = rte_eal_get_configuration();
+	struct rte_tel_data *cpuset;
+	const char *role;
+	unsigned int cpu;
+
+	if (info->lcore_id != lcore_id)
+		return 0;
+
+	switch (cfg->lcore_role[lcore_id]) {
+	case ROLE_RTE:
+		role = "RTE";
+		break;
+	case ROLE_SERVICE:
+		role = "SERVICE";
+		break;
+	case ROLE_NON_EAL:
+		role = "NON_EAL";
+		break;
+	default:
+		role = "UNKNOWN";
+		break;
+	}
+	rte_tel_data_start_dict(info->d);
+	rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id);
+	rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id));
+	rte_tel_data_add_dict_string(info->d, "role", role);
+	cpuset = rte_tel_data_alloc();
+	if (!cpuset)
+		return -ENOMEM;
+	rte_tel_data_start_array(cpuset, RTE_TEL_INT_VAL);
+	for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
+		if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset))
+			rte_tel_data_add_array_int(cpuset, cpu);
+	rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0);
+
+	return 0;
+}
+
+static int
+handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_tel_data *d)
+{
+	struct lcore_telemetry_info info = { .d = d };
+	char *endptr = NULL;
+	if (params == NULL || strlen(params) == 0)
+		return -EINVAL;
+	errno = 0;
+	info.lcore_id = strtoul(params, &endptr, 10);
+	if (errno)
+		return -errno;
+	if (endptr == params)
+		return -EINVAL;
+	return rte_lcore_iterate(lcore_telemetry_info_cb, &info);
+}
+
+RTE_INIT(lcore_telemetry)
+{
+	rte_telemetry_register_cmd(
+			"/eal/lcore/list", handle_lcore_list,
+			"List of lcore ids. Takes no parameters");
+	rte_telemetry_register_cmd(
+			"/eal/lcore/info", handle_lcore_info,
+			"Returns lcore info. Parameters: int lcore_id");
+}
+#endif /* !RTE_EXEC_ENV_WINDOWS */