[v2,1/5] telemetry: escape special char when tel string

Message ID 20220617094624.17578-2-fengchengwen@huawei.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series support telemetry dump |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

fengchengwen June 17, 2022, 9:46 a.m. UTC
  This patch supports escape special characters (including: \",\\,/,\b,
/f,/n,/r,/t) when telemetry string.
This patch is used to support telemetry xxx-dump commands which the
string may include special characters.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 93 insertions(+), 3 deletions(-)
  

Comments

Morten Brørup June 17, 2022, 11:16 a.m. UTC | #1
> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> Sent: Friday, 17 June 2022 11.46
> 
> This patch supports escape special characters (including: \",\\,/,\b,
> /f,/n,/r,/t) when telemetry string.
> This patch is used to support telemetry xxx-dump commands which the
> string may include special characters.
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> ---
>  lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 93 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> index c6fd03a5ab..0f762f633e 100644
> --- a/lib/telemetry/telemetry.c
> +++ b/lib/telemetry/telemetry.c
> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> char *out_buf, size_t buf_len)
>  	return used;
>  }
> 
> +static bool
> +json_is_special_char(char ch)
> +{
> +	static unsigned char is_spec[256] = { 0 };
> +	static bool init_once;
> +
> +	if (!init_once) {
> +		is_spec['\"'] = 1;
> +		is_spec['\\'] = 1;
> +		is_spec['/'] = 1;
> +		is_spec['\b'] = 1;
> +		is_spec['\f'] = 1;
> +		is_spec['\n'] = 1;
> +		is_spec['\r'] = 1;
> +		is_spec['\t'] = 1;
> +		init_once = true;
> +	}
> +
> +	return (bool)is_spec[(unsigned char)ch];
> +}

Here's a suggestion for simplifying the code:

Remove json_is_special_char(), and update json_escape_special_char() and json_format_string() as follows:

> +
> +static size_t
> +json_escape_special_char(char *buf, const char ch)

Consider making this function inline.

> +{
> +	size_t used = 0;
> +
> +	switch (ch) {
> +	case '\"':
> +		buf[used++] = '\\';
> +		buf[used++] = '\"';
> +		break;
> +	case '\\':
> +		buf[used++] = '\\';
> +		buf[used++] = '\\';
> +		break;
> +	case '/':
> +		buf[used++] = '\\';
> +		buf[used++] = '/';
> +		break;
> +	case '\b':
> +		buf[used++] = '\\';
> +		buf[used++] = 'b';
> +		break;
> +	case '\f':
> +		buf[used++] = '\\';
> +		buf[used++] = 'f';
> +		break;
> +	case '\n':
> +		buf[used++] = '\\';
> +		buf[used++] = 'n';
> +		break;
> +	case '\r':
> +		buf[used++] = '\\';
> +		buf[used++] = 'r';
> +		break;
> +	case '\t':
> +		buf[used++] = '\\';
> +		buf[used++] = 't';
> +		break;
> +	default:

Handle non-escaped characters in the default case here instead:

+		buf[used++] = ch;

> +		break;
> +	}
> +
> +	return used;
> +}
> +
> +static size_t
> +json_format_string(char *buf, size_t len, const char *str)
> +{
> +	size_t used = 0;
> +
> +	while (*str) {
> +		if (unlikely(len < used + 2)) {
> +			TMTY_LOG(WARNING, "Insufficient buffer when json
> format string\n");
> +			break;
> +		}
> +

-- replace:
> +		if (json_is_special_char(*str))
> +			used += json_escape_special_char(buf + used, *str);
> +		else
> +			buf[used++] = *str;
> +
> +		str++;
-- by:
+		used += json_escape_special_char(buf + used, *str++);
--

End of suggestion. Feel free to use it or not. :-)

> +	}
> +
> +	return used;
> +}
> +
>  static void
>  output_json(const char *cmd, const struct rte_tel_data *d, int s)
>  {
> @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct
> rte_tel_data *d, int s)
>  				MAX_CMD_LEN, cmd ? cmd : "none");
>  		break;
>  	case RTE_TEL_STRING:
> -		used = snprintf(out_buf, sizeof(out_buf),
> "{\"%.*s\":\"%.*s\"}",
> -				MAX_CMD_LEN, cmd,
> -				RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
> +		used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
> +				MAX_CMD_LEN, cmd);
> +		used += json_format_string(out_buf + used,
> +				sizeof(out_buf) - used - 3, d->data.str);
> +		used += snprintf(out_buf + used, sizeof(out_buf) - used,
> "\"}");

I looked for missing 0-termination in json_format_string(), but that is not required due to the immediately following snprintf().

>  		break;
>  	case RTE_TEL_DICT:
>  		prefix_used = snprintf(out_buf, sizeof(out_buf),
> "{\"%.*s\":",
> --
> 2.33.0
> 

If you want to make it generic, these four cases in output_json() also need to JSON encode the strings:

case RTE_TEL_DICT:
	case RTE_TEL_STRING_VAL: ...
	case RTE_TEL_CONTAINER: ... (strings only)

case RTE_TEL_ARRAY_STRING:
	if (d->type == RTE_TEL_ARRAY_STRING) ...
	else if (d->type == RTE_TEL_ARRAY_CONTAINER) ... (strings only)


However, JSON encoding of strings inside arrays and containers is not required for the dump purposes addressed by this patch series, so I consider this patch complete without it. No need to add "feature creep" to this series.


Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
  
Bruce Richardson June 17, 2022, 11:25 a.m. UTC | #2
On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > Sent: Friday, 17 June 2022 11.46
> > 
> > This patch supports escape special characters (including: \",\\,/,\b,
> > /f,/n,/r,/t) when telemetry string.
> > This patch is used to support telemetry xxx-dump commands which the
> > string may include special characters.
> > 
> > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > ---
> >  lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 93 insertions(+), 3 deletions(-)
> > 
> > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> > index c6fd03a5ab..0f762f633e 100644
> > --- a/lib/telemetry/telemetry.c
> > +++ b/lib/telemetry/telemetry.c
> > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> > char *out_buf, size_t buf_len)
> >  	return used;
> >  }
> > 
> > +static bool
> > +json_is_special_char(char ch)
> > +{
> > +	static unsigned char is_spec[256] = { 0 };
> > +	static bool init_once;
> > +
> > +	if (!init_once) {
> > +		is_spec['\"'] = 1;
> > +		is_spec['\\'] = 1;
> > +		is_spec['/'] = 1;
> > +		is_spec['\b'] = 1;
> > +		is_spec['\f'] = 1;
> > +		is_spec['\n'] = 1;
> > +		is_spec['\r'] = 1;
> > +		is_spec['\t'] = 1;
> > +		init_once = true;
> > +	}
> > +
> > +	return (bool)is_spec[(unsigned char)ch];
> > +}

According to the json spec at [1], the characters that need to be escaped
are:
a) any characters <0x20
b) inverted commas/quote character \"
c) the "reverse solidus character", better known to you and I as the
back-slash.

Therefore, I think this table generation could be simplified, but also
expanded using this. For completeness we should also see about handling all
control characters if they are encountered.

[1] https://www.rfc-editor.org/rfc/rfc8259.txt

/Bruce
  
Bruce Richardson June 17, 2022, 11:27 a.m. UTC | #3
On Fri, Jun 17, 2022 at 05:46:20PM +0800, Chengwen Feng wrote:
> This patch supports escape special characters (including: \",\\,/,\b,
> /f,/n,/r,/t) when telemetry string.
> This patch is used to support telemetry xxx-dump commands which the
> string may include special characters.
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> ---
>  lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 93 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> index c6fd03a5ab..0f762f633e 100644
> --- a/lib/telemetry/telemetry.c
> +++ b/lib/telemetry/telemetry.c
> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len)
>  	return used;
>  }
>  
> +static bool
> +json_is_special_char(char ch)
> +{
> +	static unsigned char is_spec[256] = { 0 };
> +	static bool init_once;
> +
> +	if (!init_once) {
> +		is_spec['\"'] = 1;
> +		is_spec['\\'] = 1;
> +		is_spec['/'] = 1;
> +		is_spec['\b'] = 1;
> +		is_spec['\f'] = 1;
> +		is_spec['\n'] = 1;
> +		is_spec['\r'] = 1;
> +		is_spec['\t'] = 1;
> +		init_once = true;
> +	}
> +
> +	return (bool)is_spec[(unsigned char)ch];
> +}
> +
> +static size_t
> +json_escape_special_char(char *buf, const char ch)
> +{
> +	size_t used = 0;
> +
> +	switch (ch) {
> +	case '\"':
> +		buf[used++] = '\\';
> +		buf[used++] = '\"';
> +		break;
> +	case '\\':
> +		buf[used++] = '\\';
> +		buf[used++] = '\\';
> +		break;
> +	case '/':
> +		buf[used++] = '\\';
> +		buf[used++] = '/';
> +		break;
> +	case '\b':
> +		buf[used++] = '\\';
> +		buf[used++] = 'b';
> +		break;
> +	case '\f':
> +		buf[used++] = '\\';
> +		buf[used++] = 'f';
> +		break;
> +	case '\n':
> +		buf[used++] = '\\';
> +		buf[used++] = 'n';
> +		break;
> +	case '\r':
> +		buf[used++] = '\\';
> +		buf[used++] = 'r';
> +		break;
> +	case '\t':
> +		buf[used++] = '\\';
> +		buf[used++] = 't';
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return used;
> +}
> +
> +static size_t
> +json_format_string(char *buf, size_t len, const char *str)
> +{
> +	size_t used = 0;
> +
> +	while (*str) {
> +		if (unlikely(len < used + 2)) {
> +			TMTY_LOG(WARNING, "Insufficient buffer when json format string\n");
> +			break;
> +		}
> +
> +		if (json_is_special_char(*str))
> +			used += json_escape_special_char(buf + used, *str);
> +		else
> +			buf[used++] = *str;
> +
> +		str++;
> +	}
> +
> +	return used;
> +}
> +
>  static void
>  output_json(const char *cmd, const struct rte_tel_data *d, int s)
>  {
> @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct rte_tel_data *d, int s)
>  				MAX_CMD_LEN, cmd ? cmd : "none");
>  		break;
>  	case RTE_TEL_STRING:
> -		used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}",
> -				MAX_CMD_LEN, cmd,
> -				RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
> +		used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
> +				MAX_CMD_LEN, cmd);
> +		used += json_format_string(out_buf + used,
> +				sizeof(out_buf) - used - 3, d->data.str);
> +		used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}");
>  		break;
>  	case RTE_TEL_DICT:
>  		prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":",
> -- 

I think it might be worthwhile to write a general json_str_printf function
to do the snprintf and the escaping in one go. Then that might be more able
to be used in other places where we output strings.

/Bruce
  
Stephen Hemminger June 17, 2022, 5:05 p.m. UTC | #4
On Fri, 17 Jun 2022 12:25:04 +0100
Bruce Richardson <bruce.richardson@intel.com> wrote:

> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > Sent: Friday, 17 June 2022 11.46
> > > 
> > > This patch supports escape special characters (including: \",\\,/,\b,
> > > /f,/n,/r,/t) when telemetry string.
> > > This patch is used to support telemetry xxx-dump commands which the
> > > string may include special characters.
> > > 
> > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > ---
> > >  lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 93 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> > > index c6fd03a5ab..0f762f633e 100644
> > > --- a/lib/telemetry/telemetry.c
> > > +++ b/lib/telemetry/telemetry.c
> > > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> > > char *out_buf, size_t buf_len)
> > >  	return used;
> > >  }
> > > 
> > > +static bool
> > > +json_is_special_char(char ch)
> > > +{
> > > +	static unsigned char is_spec[256] = { 0 };
> > > +	static bool init_once;
> > > +
> > > +	if (!init_once) {
> > > +		is_spec['\"'] = 1;
> > > +		is_spec['\\'] = 1;
> > > +		is_spec['/'] = 1;
> > > +		is_spec['\b'] = 1;
> > > +		is_spec['\f'] = 1;
> > > +		is_spec['\n'] = 1;
> > > +		is_spec['\r'] = 1;
> > > +		is_spec['\t'] = 1;
> > > +		init_once = true;
> > > +	}
> > > +
> > > +	return (bool)is_spec[(unsigned char)ch];
> > > +}  
> 
> According to the json spec at [1], the characters that need to be escaped
> are:
> a) any characters <0x20
> b) inverted commas/quote character \"
> c) the "reverse solidus character", better known to you and I as the
> back-slash.
> 
> Therefore, I think this table generation could be simplified, but also
> expanded using this. For completeness we should also see about handling all
> control characters if they are encountered.
> 
> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> 
> /Bruce

Since it is trivial could be initializer?

static const uint8_t is_spec[256] = {
   [0 ... 0x20] = 1,
   ['\"' ] = 1,
   ['\\' ] = 1,
   ['/'] = 1,

etc

Or we could change the telemetry API to disallow control characters?
  
fengchengwen June 18, 2022, 3:52 a.m. UTC | #5
On 2022/6/18 1:05, Stephen Hemminger wrote:
> On Fri, 17 Jun 2022 12:25:04 +0100
> Bruce Richardson <bruce.richardson@intel.com> wrote:
> 
>> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
>>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
>>>> Sent: Friday, 17 June 2022 11.46
>>>>
>>>> This patch supports escape special characters (including: \",\\,/,\b,
>>>> /f,/n,/r,/t) when telemetry string.
>>>> This patch is used to support telemetry xxx-dump commands which the
>>>> string may include special characters.
>>>>
>>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>>>> ---
>>>>  lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
>>>>  1 file changed, 93 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
>>>> index c6fd03a5ab..0f762f633e 100644
>>>> --- a/lib/telemetry/telemetry.c
>>>> +++ b/lib/telemetry/telemetry.c
>>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
>>>> char *out_buf, size_t buf_len)
>>>>  	return used;
>>>>  }
>>>>
>>>> +static bool
>>>> +json_is_special_char(char ch)
>>>> +{
>>>> +	static unsigned char is_spec[256] = { 0 };
>>>> +	static bool init_once;
>>>> +
>>>> +	if (!init_once) {
>>>> +		is_spec['\"'] = 1;
>>>> +		is_spec['\\'] = 1;
>>>> +		is_spec['/'] = 1;
>>>> +		is_spec['\b'] = 1;
>>>> +		is_spec['\f'] = 1;
>>>> +		is_spec['\n'] = 1;
>>>> +		is_spec['\r'] = 1;
>>>> +		is_spec['\t'] = 1;
>>>> +		init_once = true;
>>>> +	}
>>>> +
>>>> +	return (bool)is_spec[(unsigned char)ch];
>>>> +}  
>>
>> According to the json spec at [1], the characters that need to be escaped
>> are:
>> a) any characters <0x20
>> b) inverted commas/quote character \"
>> c) the "reverse solidus character", better known to you and I as the
>> back-slash.
>>
>> Therefore, I think this table generation could be simplified, but also
>> expanded using this. For completeness we should also see about handling all
>> control characters if they are encountered.
>>
>> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
>>
>> /Bruce
> 
> Since it is trivial could be initializer?
> 
> static const uint8_t is_spec[256] = {
>    [0 ... 0x20] = 1,
>    ['\"' ] = 1,
>    ['\\' ] = 1,
>    ['/'] = 1,
> 
> etc
> 
> Or we could change the telemetry API to disallow control characters?

I was thinking about converting 0~0x20, but I don't think there's a scenario.

I prefer change the telemetry API to disallow control characters, and this may not
be a violation of the ABI, if yes, the dpdk-telemetry.py will returns an error.

So I think we could add declaring and checking functions to make sure telemetry string
do not allow control characters.

> 
> 
> .
>
  
Morten Brørup June 18, 2022, 9:59 a.m. UTC | #6
+CC: Ciara Power, Telemetry library maintainer

> From: fengchengwen [mailto:fengchengwen@huawei.com]
> Sent: Saturday, 18 June 2022 05.52
> 
> On 2022/6/18 1:05, Stephen Hemminger wrote:
> > On Fri, 17 Jun 2022 12:25:04 +0100
> > Bruce Richardson <bruce.richardson@intel.com> wrote:
> >
> >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> >>>> Sent: Friday, 17 June 2022 11.46
> >>>>
> >>>> This patch supports escape special characters (including:
> \",\\,/,\b,
> >>>> /f,/n,/r,/t) when telemetry string.
> >>>> This patch is used to support telemetry xxx-dump commands which
> the
> >>>> string may include special characters.
> >>>>
> >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> >>>> ---
> >>>>  lib/telemetry/telemetry.c | 96
> +++++++++++++++++++++++++++++++++++++--
> >>>>  1 file changed, 93 insertions(+), 3 deletions(-)
> >>>>
> >>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> >>>> index c6fd03a5ab..0f762f633e 100644
> >>>> --- a/lib/telemetry/telemetry.c
> >>>> +++ b/lib/telemetry/telemetry.c
> >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> *d,
> >>>> char *out_buf, size_t buf_len)
> >>>>  	return used;
> >>>>  }
> >>>>
> >>>> +static bool
> >>>> +json_is_special_char(char ch)
> >>>> +{
> >>>> +	static unsigned char is_spec[256] = { 0 };
> >>>> +	static bool init_once;
> >>>> +
> >>>> +	if (!init_once) {
> >>>> +		is_spec['\"'] = 1;
> >>>> +		is_spec['\\'] = 1;
> >>>> +		is_spec['/'] = 1;
> >>>> +		is_spec['\b'] = 1;
> >>>> +		is_spec['\f'] = 1;
> >>>> +		is_spec['\n'] = 1;
> >>>> +		is_spec['\r'] = 1;
> >>>> +		is_spec['\t'] = 1;
> >>>> +		init_once = true;
> >>>> +	}
> >>>> +
> >>>> +	return (bool)is_spec[(unsigned char)ch];
> >>>> +}
> >>
> >> According to the json spec at [1], the characters that need to be
> escaped
> >> are:
> >> a) any characters <0x20
> >> b) inverted commas/quote character \"
> >> c) the "reverse solidus character", better known to you and I as the
> >> back-slash.
> >>
> >> Therefore, I think this table generation could be simplified, but
> also
> >> expanded using this. For completeness we should also see about
> handling all
> >> control characters if they are encountered.
> >>
> >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> >>
> >> /Bruce
> >
> > Since it is trivial could be initializer?
> >
> > static const uint8_t is_spec[256] = {
> >    [0 ... 0x20] = 1,
> >    ['\"' ] = 1,
> >    ['\\' ] = 1,
> >    ['/'] = 1,
> >
> > etc
> >
> > Or we could change the telemetry API to disallow control characters?
> 
> I was thinking about converting 0~0x20, but I don't think there's a
> scenario.
> 
> I prefer change the telemetry API to disallow control characters, and
> this may not
> be a violation of the ABI, if yes, the dpdk-telemetry.py will returns
> an error.

I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.

So we need to define exactly what the STRING type contains.

I hope we can all agree that control characters should be disallowed.

The more complicated question is: Do we want to use the ASCII character set only, or do we want to use UTF-8 encoded Unicode?

Personally, think UTF-8 encoded Unicode is more future proof, and would vote for that.

But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce another data type for UTF-8 strings.

UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many SNMP MIBs.

> 
> So I think we could add declaring and checking functions to make sure
> telemetry string
> do not allow control characters.

Input validation (when storing a string in the telemetry database) has a performance cost, so it could be a compile time debug option, like the memory cookies and mbuf integrity checks. Just a thought.
  
Power, Ciara June 22, 2022, 7:57 a.m. UTC | #7
Hi folks,

> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Saturday 18 June 2022 10:59
> To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> <ciara.power@intel.com>
> Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
> 
> +CC: Ciara Power, Telemetry library maintainer
> 
> > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > Sent: Saturday, 18 June 2022 05.52
> >
> > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > >
> > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > >>>> Sent: Friday, 17 June 2022 11.46
> > >>>>
> > >>>> This patch supports escape special characters (including:
> > \",\\,/,\b,
> > >>>> /f,/n,/r,/t) when telemetry string.
> > >>>> This patch is used to support telemetry xxx-dump commands which
> > the
> > >>>> string may include special characters.
> > >>>>
> > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > >>>> ---
> > >>>>  lib/telemetry/telemetry.c | 96
> > +++++++++++++++++++++++++++++++++++++--
> > >>>>  1 file changed, 93 insertions(+), 3 deletions(-)
> > >>>>
> > >>>> diff --git a/lib/telemetry/telemetry.c
> > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > >>>> --- a/lib/telemetry/telemetry.c
> > >>>> +++ b/lib/telemetry/telemetry.c
> > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > *d,
> > >>>> char *out_buf, size_t buf_len)
> > >>>>  	return used;
> > >>>>  }
> > >>>>
> > >>>> +static bool
> > >>>> +json_is_special_char(char ch)
> > >>>> +{
> > >>>> +	static unsigned char is_spec[256] = { 0 };
> > >>>> +	static bool init_once;
> > >>>> +
> > >>>> +	if (!init_once) {
> > >>>> +		is_spec['\"'] = 1;
> > >>>> +		is_spec['\\'] = 1;
> > >>>> +		is_spec['/'] = 1;
> > >>>> +		is_spec['\b'] = 1;
> > >>>> +		is_spec['\f'] = 1;
> > >>>> +		is_spec['\n'] = 1;
> > >>>> +		is_spec['\r'] = 1;
> > >>>> +		is_spec['\t'] = 1;
> > >>>> +		init_once = true;
> > >>>> +	}
> > >>>> +
> > >>>> +	return (bool)is_spec[(unsigned char)ch]; }
> > >>
> > >> According to the json spec at [1], the characters that need to be
> > escaped
> > >> are:
> > >> a) any characters <0x20
> > >> b) inverted commas/quote character \"
> > >> c) the "reverse solidus character", better known to you and I as
> > >> the back-slash.
> > >>
> > >> Therefore, I think this table generation could be simplified, but
> > also
> > >> expanded using this. For completeness we should also see about
> > handling all
> > >> control characters if they are encountered.
> > >>
> > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > >>
> > >> /Bruce
> > >
> > > Since it is trivial could be initializer?
> > >
> > > static const uint8_t is_spec[256] = {
> > >    [0 ... 0x20] = 1,
> > >    ['\"' ] = 1,
> > >    ['\\' ] = 1,
> > >    ['/'] = 1,
> > >
> > > etc
> > >
> > > Or we could change the telemetry API to disallow control characters?
> >
> > I was thinking about converting 0~0x20, but I don't think there's a
> > scenario.
> >
> > I prefer change the telemetry API to disallow control characters, and
> > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > will returns an error.
> 
> I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
> 
> So we need to define exactly what the STRING type contains.
> 
> I hope we can all agree that control characters should be disallowed.
> 
> The more complicated question is: Do we want to use the ASCII character set
> only, or do we want to use UTF-8 encoded Unicode?
> 
> Personally, think UTF-8 encoded Unicode is more future proof, and would
> vote for that.
> 
> But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> another data type for UTF-8 strings.
> 
> UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> SNMP MIBs.
> 
[CP] 

Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).

[1] https://www.rfc-editor.org/rfc/rfc8259.txt

> >
> > So I think we could add declaring and checking functions to make sure
> > telemetry string do not allow control characters.
[CP] 

I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.

<snip>

In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.

Thanks,
Ciara
  
Bruce Richardson June 22, 2022, 9:19 a.m. UTC | #8
On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote:
> Hi folks,
> 
> > -----Original Message-----
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Saturday 18 June 2022 10:59
> > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Richardson, Bruce
> > <bruce.richardson@intel.com>
> > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> > jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> > <ciara.power@intel.com>
> > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
> >
> > +CC: Ciara Power, Telemetry library maintainer
> >
> > > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > > Sent: Saturday, 18 June 2022 05.52
> > >
> > > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > > >
> > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > >>>> Sent: Friday, 17 June 2022 11.46
> > > >>>>
> > > >>>> This patch supports escape special characters (including:
> > > \",\\,/,\b,
> > > >>>> /f,/n,/r,/t) when telemetry string.
> > > >>>> This patch is used to support telemetry xxx-dump commands which
> > > the
> > > >>>> string may include special characters.
> > > >>>>
> > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > >>>> ---
> > > >>>>  lib/telemetry/telemetry.c | 96
> > > +++++++++++++++++++++++++++++++++++++--
> > > >>>>  1 file changed, 93 insertions(+), 3 deletions(-)
> > > >>>>
> > > >>>> diff --git a/lib/telemetry/telemetry.c
> > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > > >>>> --- a/lib/telemetry/telemetry.c
> > > >>>> +++ b/lib/telemetry/telemetry.c
> > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > > *d,
> > > >>>> char *out_buf, size_t buf_len)
> > > >>>>        return used;
> > > >>>>  }
> > > >>>>
> > > >>>> +static bool
> > > >>>> +json_is_special_char(char ch)
> > > >>>> +{
> > > >>>> +      static unsigned char is_spec[256] = { 0 };
> > > >>>> +      static bool init_once;
> > > >>>> +
> > > >>>> +      if (!init_once) {
> > > >>>> +              is_spec['\"'] = 1;
> > > >>>> +              is_spec['\\'] = 1;
> > > >>>> +              is_spec['/'] = 1;
> > > >>>> +              is_spec['\b'] = 1;
> > > >>>> +              is_spec['\f'] = 1;
> > > >>>> +              is_spec['\n'] = 1;
> > > >>>> +              is_spec['\r'] = 1;
> > > >>>> +              is_spec['\t'] = 1;
> > > >>>> +              init_once = true;
> > > >>>> +      }
> > > >>>> +
> > > >>>> +      return (bool)is_spec[(unsigned char)ch]; }
> > > >>
> > > >> According to the json spec at [1], the characters that need to be
> > > escaped
> > > >> are:
> > > >> a) any characters <0x20
> > > >> b) inverted commas/quote character \"
> > > >> c) the "reverse solidus character", better known to you and I as
> > > >> the back-slash.
> > > >>
> > > >> Therefore, I think this table generation could be simplified, but
> > > also
> > > >> expanded using this. For completeness we should also see about
> > > handling all
> > > >> control characters if they are encountered.
> > > >>
> > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > > >>
> > > >> /Bruce
> > > >
> > > > Since it is trivial could be initializer?
> > > >
> > > > static const uint8_t is_spec[256] = {
> > > >    [0 ... 0x20] = 1,
> > > >    ['\"' ] = 1,
> > > >    ['\\' ] = 1,
> > > >    ['/'] = 1,
> > > >
> > > > etc
> > > >
> > > > Or we could change the telemetry API to disallow control characters?
> > >
> > > I was thinking about converting 0~0x20, but I don't think there's a
> > > scenario.
> > >
> > > I prefer change the telemetry API to disallow control characters, and
> > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > > will returns an error.
> >
> > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
> >
> > So we need to define exactly what the STRING type contains.
> >
> > I hope we can all agree that control characters should be disallowed.
> >
> > The more complicated question is: Do we want to use the ASCII character set
> > only, or do we want to use UTF-8 encoded Unicode?
> >
> > Personally, think UTF-8 encoded Unicode is more future proof, and would
> > vote for that.
> >
> > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> > another data type for UTF-8 strings.
> >
> > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> > SNMP MIBs.
> >
> [CP]
> 
> Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).
> 
> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> 
> > >
> > > So I think we could add declaring and checking functions to make sure
> > > telemetry string do not allow control characters.
> [CP]
> 
> I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.
> 
> <snip>
> 
> In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.
>

I'm hoping to have some time to try and prototype this myself soon, and
send out a draft patch to this mailing list for consideration.

/Bruce
  
Bruce Richardson June 23, 2022, 4:45 p.m. UTC | #9
On Wed, Jun 22, 2022 at 10:19:48AM +0100, Bruce Richardson wrote:
> On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote:
> > Hi folks,
> > 
> > > -----Original Message-----
> > > From: Morten Brørup <mb@smartsharesystems.com>
> > > Sent: Saturday 18 June 2022 10:59
> > > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> > > <stephen@networkplumber.org>; Richardson, Bruce
> > > <bruce.richardson@intel.com>
> > > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> > > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> > > jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> > > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> > > <ciara.power@intel.com>
> > > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
> > >
> > > +CC: Ciara Power, Telemetry library maintainer
> > >
> > > > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > > > Sent: Saturday, 18 June 2022 05.52
> > > >
> > > > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > > > >
> > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > > >>>> Sent: Friday, 17 June 2022 11.46
> > > > >>>>
> > > > >>>> This patch supports escape special characters (including:
> > > > \",\\,/,\b,
> > > > >>>> /f,/n,/r,/t) when telemetry string.
> > > > >>>> This patch is used to support telemetry xxx-dump commands which
> > > > the
> > > > >>>> string may include special characters.
> > > > >>>>
> > > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > > >>>> ---
> > > > >>>>  lib/telemetry/telemetry.c | 96
> > > > +++++++++++++++++++++++++++++++++++++--
> > > > >>>>  1 file changed, 93 insertions(+), 3 deletions(-)
> > > > >>>>
> > > > >>>> diff --git a/lib/telemetry/telemetry.c
> > > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > > > >>>> --- a/lib/telemetry/telemetry.c
> > > > >>>> +++ b/lib/telemetry/telemetry.c
> > > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > > > *d,
> > > > >>>> char *out_buf, size_t buf_len)
> > > > >>>>        return used;
> > > > >>>>  }
> > > > >>>>
> > > > >>>> +static bool
> > > > >>>> +json_is_special_char(char ch)
> > > > >>>> +{
> > > > >>>> +      static unsigned char is_spec[256] = { 0 };
> > > > >>>> +      static bool init_once;
> > > > >>>> +
> > > > >>>> +      if (!init_once) {
> > > > >>>> +              is_spec['\"'] = 1;
> > > > >>>> +              is_spec['\\'] = 1;
> > > > >>>> +              is_spec['/'] = 1;
> > > > >>>> +              is_spec['\b'] = 1;
> > > > >>>> +              is_spec['\f'] = 1;
> > > > >>>> +              is_spec['\n'] = 1;
> > > > >>>> +              is_spec['\r'] = 1;
> > > > >>>> +              is_spec['\t'] = 1;
> > > > >>>> +              init_once = true;
> > > > >>>> +      }
> > > > >>>> +
> > > > >>>> +      return (bool)is_spec[(unsigned char)ch]; }
> > > > >>
> > > > >> According to the json spec at [1], the characters that need to be
> > > > escaped
> > > > >> are:
> > > > >> a) any characters <0x20
> > > > >> b) inverted commas/quote character \"
> > > > >> c) the "reverse solidus character", better known to you and I as
> > > > >> the back-slash.
> > > > >>
> > > > >> Therefore, I think this table generation could be simplified, but
> > > > also
> > > > >> expanded using this. For completeness we should also see about
> > > > handling all
> > > > >> control characters if they are encountered.
> > > > >>
> > > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > > > >>
> > > > >> /Bruce
> > > > >
> > > > > Since it is trivial could be initializer?
> > > > >
> > > > > static const uint8_t is_spec[256] = {
> > > > >    [0 ... 0x20] = 1,
> > > > >    ['\"' ] = 1,
> > > > >    ['\\' ] = 1,
> > > > >    ['/'] = 1,
> > > > >
> > > > > etc
> > > > >
> > > > > Or we could change the telemetry API to disallow control characters?
> > > >
> > > > I was thinking about converting 0~0x20, but I don't think there's a
> > > > scenario.
> > > >
> > > > I prefer change the telemetry API to disallow control characters, and
> > > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > > > will returns an error.
> > >
> > > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
> > >
> > > So we need to define exactly what the STRING type contains.
> > >
> > > I hope we can all agree that control characters should be disallowed.
> > >
> > > The more complicated question is: Do we want to use the ASCII character set
> > > only, or do we want to use UTF-8 encoded Unicode?
> > >
> > > Personally, think UTF-8 encoded Unicode is more future proof, and would
> > > vote for that.
> > >
> > > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> > > another data type for UTF-8 strings.
> > >
> > > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> > > SNMP MIBs.
> > >
> > [CP]
> > 
> > Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).
> > 
> > [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > 
> > > >
> > > > So I think we could add declaring and checking functions to make sure
> > > > telemetry string do not allow control characters.
> > [CP]
> > 
> > I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.
> > 
> > <snip>
> > 
> > In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.
> >
> 
> I'm hoping to have some time to try and prototype this myself soon, and
> send out a draft patch to this mailing list for consideration.
> 
Here is an RFC of my current prototype of this:

http://patches.dpdk.org/project/dpdk/list/?series=23739

Feedback welcome.

Regards,
/Bruce
  

Patch

diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
index c6fd03a5ab..0f762f633e 100644
--- a/lib/telemetry/telemetry.c
+++ b/lib/telemetry/telemetry.c
@@ -215,6 +215,94 @@  container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len)
 	return used;
 }
 
+static bool
+json_is_special_char(char ch)
+{
+	static unsigned char is_spec[256] = { 0 };
+	static bool init_once;
+
+	if (!init_once) {
+		is_spec['\"'] = 1;
+		is_spec['\\'] = 1;
+		is_spec['/'] = 1;
+		is_spec['\b'] = 1;
+		is_spec['\f'] = 1;
+		is_spec['\n'] = 1;
+		is_spec['\r'] = 1;
+		is_spec['\t'] = 1;
+		init_once = true;
+	}
+
+	return (bool)is_spec[(unsigned char)ch];
+}
+
+static size_t
+json_escape_special_char(char *buf, const char ch)
+{
+	size_t used = 0;
+
+	switch (ch) {
+	case '\"':
+		buf[used++] = '\\';
+		buf[used++] = '\"';
+		break;
+	case '\\':
+		buf[used++] = '\\';
+		buf[used++] = '\\';
+		break;
+	case '/':
+		buf[used++] = '\\';
+		buf[used++] = '/';
+		break;
+	case '\b':
+		buf[used++] = '\\';
+		buf[used++] = 'b';
+		break;
+	case '\f':
+		buf[used++] = '\\';
+		buf[used++] = 'f';
+		break;
+	case '\n':
+		buf[used++] = '\\';
+		buf[used++] = 'n';
+		break;
+	case '\r':
+		buf[used++] = '\\';
+		buf[used++] = 'r';
+		break;
+	case '\t':
+		buf[used++] = '\\';
+		buf[used++] = 't';
+		break;
+	default:
+		break;
+	}
+
+	return used;
+}
+
+static size_t
+json_format_string(char *buf, size_t len, const char *str)
+{
+	size_t used = 0;
+
+	while (*str) {
+		if (unlikely(len < used + 2)) {
+			TMTY_LOG(WARNING, "Insufficient buffer when json format string\n");
+			break;
+		}
+
+		if (json_is_special_char(*str))
+			used += json_escape_special_char(buf + used, *str);
+		else
+			buf[used++] = *str;
+
+		str++;
+	}
+
+	return used;
+}
+
 static void
 output_json(const char *cmd, const struct rte_tel_data *d, int s)
 {
@@ -232,9 +320,11 @@  output_json(const char *cmd, const struct rte_tel_data *d, int s)
 				MAX_CMD_LEN, cmd ? cmd : "none");
 		break;
 	case RTE_TEL_STRING:
-		used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}",
-				MAX_CMD_LEN, cmd,
-				RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
+		used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
+				MAX_CMD_LEN, cmd);
+		used += json_format_string(out_buf + used,
+				sizeof(out_buf) - used - 3, d->data.str);
+		used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}");
 		break;
 	case RTE_TEL_DICT:
 		prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":",