Message ID | 20220617094624.17578-2-fengchengwen@huawei.com (mailing list archive) |
---|---|
State | New |
Delegated to: | Thomas Monjalon |
Headers | show |
Series | support telemetry dump | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
> From: Chengwen Feng [mailto:fengchengwen@huawei.com] > Sent: Friday, 17 June 2022 11.46 > > This patch supports escape special characters (including: \",\\,/,\b, > /f,/n,/r,/t) when telemetry string. > This patch is used to support telemetry xxx-dump commands which the > string may include special characters. > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > --- > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 93 insertions(+), 3 deletions(-) > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c > index c6fd03a5ab..0f762f633e 100644 > --- a/lib/telemetry/telemetry.c > +++ b/lib/telemetry/telemetry.c > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, > char *out_buf, size_t buf_len) > return used; > } > > +static bool > +json_is_special_char(char ch) > +{ > + static unsigned char is_spec[256] = { 0 }; > + static bool init_once; > + > + if (!init_once) { > + is_spec['\"'] = 1; > + is_spec['\\'] = 1; > + is_spec['/'] = 1; > + is_spec['\b'] = 1; > + is_spec['\f'] = 1; > + is_spec['\n'] = 1; > + is_spec['\r'] = 1; > + is_spec['\t'] = 1; > + init_once = true; > + } > + > + return (bool)is_spec[(unsigned char)ch]; > +} Here's a suggestion for simplifying the code: Remove json_is_special_char(), and update json_escape_special_char() and json_format_string() as follows: > + > +static size_t > +json_escape_special_char(char *buf, const char ch) Consider making this function inline. > +{ > + size_t used = 0; > + > + switch (ch) { > + case '\"': > + buf[used++] = '\\'; > + buf[used++] = '\"'; > + break; > + case '\\': > + buf[used++] = '\\'; > + buf[used++] = '\\'; > + break; > + case '/': > + buf[used++] = '\\'; > + buf[used++] = '/'; > + break; > + case '\b': > + buf[used++] = '\\'; > + buf[used++] = 'b'; > + break; > + case '\f': > + buf[used++] = '\\'; > + buf[used++] = 'f'; > + break; > + case '\n': > + buf[used++] = '\\'; > + buf[used++] = 'n'; > + break; > + case '\r': > + buf[used++] = '\\'; > + buf[used++] = 'r'; > + break; > + case '\t': > + buf[used++] = '\\'; > + buf[used++] = 't'; > + break; > + default: Handle non-escaped characters in the default case here instead: + buf[used++] = ch; > + break; > + } > + > + return used; > +} > + > +static size_t > +json_format_string(char *buf, size_t len, const char *str) > +{ > + size_t used = 0; > + > + while (*str) { > + if (unlikely(len < used + 2)) { > + TMTY_LOG(WARNING, "Insufficient buffer when json > format string\n"); > + break; > + } > + -- replace: > + if (json_is_special_char(*str)) > + used += json_escape_special_char(buf + used, *str); > + else > + buf[used++] = *str; > + > + str++; -- by: + used += json_escape_special_char(buf + used, *str++); -- End of suggestion. Feel free to use it or not. :-) > + } > + > + return used; > +} > + > static void > output_json(const char *cmd, const struct rte_tel_data *d, int s) > { > @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct > rte_tel_data *d, int s) > MAX_CMD_LEN, cmd ? cmd : "none"); > break; > case RTE_TEL_STRING: > - used = snprintf(out_buf, sizeof(out_buf), > "{\"%.*s\":\"%.*s\"}", > - MAX_CMD_LEN, cmd, > - RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str); > + used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"", > + MAX_CMD_LEN, cmd); > + used += json_format_string(out_buf + used, > + sizeof(out_buf) - used - 3, d->data.str); > + used += snprintf(out_buf + used, sizeof(out_buf) - used, > "\"}"); I looked for missing 0-termination in json_format_string(), but that is not required due to the immediately following snprintf(). > break; > case RTE_TEL_DICT: > prefix_used = snprintf(out_buf, sizeof(out_buf), > "{\"%.*s\":", > -- > 2.33.0 > If you want to make it generic, these four cases in output_json() also need to JSON encode the strings: case RTE_TEL_DICT: case RTE_TEL_STRING_VAL: ... case RTE_TEL_CONTAINER: ... (strings only) case RTE_TEL_ARRAY_STRING: if (d->type == RTE_TEL_ARRAY_STRING) ... else if (d->type == RTE_TEL_ARRAY_CONTAINER) ... (strings only) However, JSON encoding of strings inside arrays and containers is not required for the dump purposes addressed by this patch series, so I consider this patch complete without it. No need to add "feature creep" to this series. Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > > From: Chengwen Feng [mailto:fengchengwen@huawei.com] > > Sent: Friday, 17 June 2022 11.46 > > > > This patch supports escape special characters (including: \",\\,/,\b, > > /f,/n,/r,/t) when telemetry string. > > This patch is used to support telemetry xxx-dump commands which the > > string may include special characters. > > > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > > --- > > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 93 insertions(+), 3 deletions(-) > > > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c > > index c6fd03a5ab..0f762f633e 100644 > > --- a/lib/telemetry/telemetry.c > > +++ b/lib/telemetry/telemetry.c > > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, > > char *out_buf, size_t buf_len) > > return used; > > } > > > > +static bool > > +json_is_special_char(char ch) > > +{ > > + static unsigned char is_spec[256] = { 0 }; > > + static bool init_once; > > + > > + if (!init_once) { > > + is_spec['\"'] = 1; > > + is_spec['\\'] = 1; > > + is_spec['/'] = 1; > > + is_spec['\b'] = 1; > > + is_spec['\f'] = 1; > > + is_spec['\n'] = 1; > > + is_spec['\r'] = 1; > > + is_spec['\t'] = 1; > > + init_once = true; > > + } > > + > > + return (bool)is_spec[(unsigned char)ch]; > > +} According to the json spec at [1], the characters that need to be escaped are: a) any characters <0x20 b) inverted commas/quote character \" c) the "reverse solidus character", better known to you and I as the back-slash. Therefore, I think this table generation could be simplified, but also expanded using this. For completeness we should also see about handling all control characters if they are encountered. [1] https://www.rfc-editor.org/rfc/rfc8259.txt /Bruce
On Fri, Jun 17, 2022 at 05:46:20PM +0800, Chengwen Feng wrote: > This patch supports escape special characters (including: \",\\,/,\b, > /f,/n,/r,/t) when telemetry string. > This patch is used to support telemetry xxx-dump commands which the > string may include special characters. > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > --- > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 93 insertions(+), 3 deletions(-) > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c > index c6fd03a5ab..0f762f633e 100644 > --- a/lib/telemetry/telemetry.c > +++ b/lib/telemetry/telemetry.c > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len) > return used; > } > > +static bool > +json_is_special_char(char ch) > +{ > + static unsigned char is_spec[256] = { 0 }; > + static bool init_once; > + > + if (!init_once) { > + is_spec['\"'] = 1; > + is_spec['\\'] = 1; > + is_spec['/'] = 1; > + is_spec['\b'] = 1; > + is_spec['\f'] = 1; > + is_spec['\n'] = 1; > + is_spec['\r'] = 1; > + is_spec['\t'] = 1; > + init_once = true; > + } > + > + return (bool)is_spec[(unsigned char)ch]; > +} > + > +static size_t > +json_escape_special_char(char *buf, const char ch) > +{ > + size_t used = 0; > + > + switch (ch) { > + case '\"': > + buf[used++] = '\\'; > + buf[used++] = '\"'; > + break; > + case '\\': > + buf[used++] = '\\'; > + buf[used++] = '\\'; > + break; > + case '/': > + buf[used++] = '\\'; > + buf[used++] = '/'; > + break; > + case '\b': > + buf[used++] = '\\'; > + buf[used++] = 'b'; > + break; > + case '\f': > + buf[used++] = '\\'; > + buf[used++] = 'f'; > + break; > + case '\n': > + buf[used++] = '\\'; > + buf[used++] = 'n'; > + break; > + case '\r': > + buf[used++] = '\\'; > + buf[used++] = 'r'; > + break; > + case '\t': > + buf[used++] = '\\'; > + buf[used++] = 't'; > + break; > + default: > + break; > + } > + > + return used; > +} > + > +static size_t > +json_format_string(char *buf, size_t len, const char *str) > +{ > + size_t used = 0; > + > + while (*str) { > + if (unlikely(len < used + 2)) { > + TMTY_LOG(WARNING, "Insufficient buffer when json format string\n"); > + break; > + } > + > + if (json_is_special_char(*str)) > + used += json_escape_special_char(buf + used, *str); > + else > + buf[used++] = *str; > + > + str++; > + } > + > + return used; > +} > + > static void > output_json(const char *cmd, const struct rte_tel_data *d, int s) > { > @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct rte_tel_data *d, int s) > MAX_CMD_LEN, cmd ? cmd : "none"); > break; > case RTE_TEL_STRING: > - used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}", > - MAX_CMD_LEN, cmd, > - RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str); > + used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"", > + MAX_CMD_LEN, cmd); > + used += json_format_string(out_buf + used, > + sizeof(out_buf) - used - 3, d->data.str); > + used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}"); > break; > case RTE_TEL_DICT: > prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":", > -- I think it might be worthwhile to write a general json_str_printf function to do the snprintf and the escaping in one go. Then that might be more able to be used in other places where we output strings. /Bruce
On Fri, 17 Jun 2022 12:25:04 +0100 Bruce Richardson <bruce.richardson@intel.com> wrote: > On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > > > From: Chengwen Feng [mailto:fengchengwen@huawei.com] > > > Sent: Friday, 17 June 2022 11.46 > > > > > > This patch supports escape special characters (including: \",\\,/,\b, > > > /f,/n,/r,/t) when telemetry string. > > > This patch is used to support telemetry xxx-dump commands which the > > > string may include special characters. > > > > > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > > > --- > > > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- > > > 1 file changed, 93 insertions(+), 3 deletions(-) > > > > > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c > > > index c6fd03a5ab..0f762f633e 100644 > > > --- a/lib/telemetry/telemetry.c > > > +++ b/lib/telemetry/telemetry.c > > > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, > > > char *out_buf, size_t buf_len) > > > return used; > > > } > > > > > > +static bool > > > +json_is_special_char(char ch) > > > +{ > > > + static unsigned char is_spec[256] = { 0 }; > > > + static bool init_once; > > > + > > > + if (!init_once) { > > > + is_spec['\"'] = 1; > > > + is_spec['\\'] = 1; > > > + is_spec['/'] = 1; > > > + is_spec['\b'] = 1; > > > + is_spec['\f'] = 1; > > > + is_spec['\n'] = 1; > > > + is_spec['\r'] = 1; > > > + is_spec['\t'] = 1; > > > + init_once = true; > > > + } > > > + > > > + return (bool)is_spec[(unsigned char)ch]; > > > +} > > According to the json spec at [1], the characters that need to be escaped > are: > a) any characters <0x20 > b) inverted commas/quote character \" > c) the "reverse solidus character", better known to you and I as the > back-slash. > > Therefore, I think this table generation could be simplified, but also > expanded using this. For completeness we should also see about handling all > control characters if they are encountered. > > [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > /Bruce Since it is trivial could be initializer? static const uint8_t is_spec[256] = { [0 ... 0x20] = 1, ['\"' ] = 1, ['\\' ] = 1, ['/'] = 1, etc Or we could change the telemetry API to disallow control characters?
On 2022/6/18 1:05, Stephen Hemminger wrote: > On Fri, 17 Jun 2022 12:25:04 +0100 > Bruce Richardson <bruce.richardson@intel.com> wrote: > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com] >>>> Sent: Friday, 17 June 2022 11.46 >>>> >>>> This patch supports escape special characters (including: \",\\,/,\b, >>>> /f,/n,/r,/t) when telemetry string. >>>> This patch is used to support telemetry xxx-dump commands which the >>>> string may include special characters. >>>> >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> >>>> --- >>>> lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- >>>> 1 file changed, 93 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c >>>> index c6fd03a5ab..0f762f633e 100644 >>>> --- a/lib/telemetry/telemetry.c >>>> +++ b/lib/telemetry/telemetry.c >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, >>>> char *out_buf, size_t buf_len) >>>> return used; >>>> } >>>> >>>> +static bool >>>> +json_is_special_char(char ch) >>>> +{ >>>> + static unsigned char is_spec[256] = { 0 }; >>>> + static bool init_once; >>>> + >>>> + if (!init_once) { >>>> + is_spec['\"'] = 1; >>>> + is_spec['\\'] = 1; >>>> + is_spec['/'] = 1; >>>> + is_spec['\b'] = 1; >>>> + is_spec['\f'] = 1; >>>> + is_spec['\n'] = 1; >>>> + is_spec['\r'] = 1; >>>> + is_spec['\t'] = 1; >>>> + init_once = true; >>>> + } >>>> + >>>> + return (bool)is_spec[(unsigned char)ch]; >>>> +} >> >> According to the json spec at [1], the characters that need to be escaped >> are: >> a) any characters <0x20 >> b) inverted commas/quote character \" >> c) the "reverse solidus character", better known to you and I as the >> back-slash. >> >> Therefore, I think this table generation could be simplified, but also >> expanded using this. For completeness we should also see about handling all >> control characters if they are encountered. >> >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt >> >> /Bruce > > Since it is trivial could be initializer? > > static const uint8_t is_spec[256] = { > [0 ... 0x20] = 1, > ['\"' ] = 1, > ['\\' ] = 1, > ['/'] = 1, > > etc > > Or we could change the telemetry API to disallow control characters? I was thinking about converting 0~0x20, but I don't think there's a scenario. I prefer change the telemetry API to disallow control characters, and this may not be a violation of the ABI, if yes, the dpdk-telemetry.py will returns an error. So I think we could add declaring and checking functions to make sure telemetry string do not allow control characters. > > > . >
+CC: Ciara Power, Telemetry library maintainer > From: fengchengwen [mailto:fengchengwen@huawei.com] > Sent: Saturday, 18 June 2022 05.52 > > On 2022/6/18 1:05, Stephen Hemminger wrote: > > On Fri, 17 Jun 2022 12:25:04 +0100 > > Bruce Richardson <bruce.richardson@intel.com> wrote: > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com] > >>>> Sent: Friday, 17 June 2022 11.46 > >>>> > >>>> This patch supports escape special characters (including: > \",\\,/,\b, > >>>> /f,/n,/r,/t) when telemetry string. > >>>> This patch is used to support telemetry xxx-dump commands which > the > >>>> string may include special characters. > >>>> > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > >>>> --- > >>>> lib/telemetry/telemetry.c | 96 > +++++++++++++++++++++++++++++++++++++-- > >>>> 1 file changed, 93 insertions(+), 3 deletions(-) > >>>> > >>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c > >>>> index c6fd03a5ab..0f762f633e 100644 > >>>> --- a/lib/telemetry/telemetry.c > >>>> +++ b/lib/telemetry/telemetry.c > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data > *d, > >>>> char *out_buf, size_t buf_len) > >>>> return used; > >>>> } > >>>> > >>>> +static bool > >>>> +json_is_special_char(char ch) > >>>> +{ > >>>> + static unsigned char is_spec[256] = { 0 }; > >>>> + static bool init_once; > >>>> + > >>>> + if (!init_once) { > >>>> + is_spec['\"'] = 1; > >>>> + is_spec['\\'] = 1; > >>>> + is_spec['/'] = 1; > >>>> + is_spec['\b'] = 1; > >>>> + is_spec['\f'] = 1; > >>>> + is_spec['\n'] = 1; > >>>> + is_spec['\r'] = 1; > >>>> + is_spec['\t'] = 1; > >>>> + init_once = true; > >>>> + } > >>>> + > >>>> + return (bool)is_spec[(unsigned char)ch]; > >>>> +} > >> > >> According to the json spec at [1], the characters that need to be > escaped > >> are: > >> a) any characters <0x20 > >> b) inverted commas/quote character \" > >> c) the "reverse solidus character", better known to you and I as the > >> back-slash. > >> > >> Therefore, I think this table generation could be simplified, but > also > >> expanded using this. For completeness we should also see about > handling all > >> control characters if they are encountered. > >> > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt > >> > >> /Bruce > > > > Since it is trivial could be initializer? > > > > static const uint8_t is_spec[256] = { > > [0 ... 0x20] = 1, > > ['\"' ] = 1, > > ['\\' ] = 1, > > ['/'] = 1, > > > > etc > > > > Or we could change the telemetry API to disallow control characters? > > I was thinking about converting 0~0x20, but I don't think there's a > scenario. > > I prefer change the telemetry API to disallow control characters, and > this may not > be a violation of the ABI, if yes, the dpdk-telemetry.py will returns > an error. I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB. So we need to define exactly what the STRING type contains. I hope we can all agree that control characters should be disallowed. The more complicated question is: Do we want to use the ASCII character set only, or do we want to use UTF-8 encoded Unicode? Personally, think UTF-8 encoded Unicode is more future proof, and would vote for that. But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce another data type for UTF-8 strings. UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many SNMP MIBs. > > So I think we could add declaring and checking functions to make sure > telemetry string > do not allow control characters. Input validation (when storing a string in the telemetry database) has a performance cost, so it could be a compile time debug option, like the memory cookies and mbuf integrity checks. Just a thought.
Hi folks, > -----Original Message----- > From: Morten Brørup <mb@smartsharesystems.com> > Sent: Saturday 18 June 2022 10:59 > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger > <stephen@networkplumber.org>; Richardson, Bruce > <bruce.richardson@intel.com> > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru; > jerinj@marvell.com; sachin.saxena@oss.nxp.com; > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara > <ciara.power@intel.com> > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string > > +CC: Ciara Power, Telemetry library maintainer > > > From: fengchengwen [mailto:fengchengwen@huawei.com] > > Sent: Saturday, 18 June 2022 05.52 > > > > On 2022/6/18 1:05, Stephen Hemminger wrote: > > > On Fri, 17 Jun 2022 12:25:04 +0100 > > > Bruce Richardson <bruce.richardson@intel.com> wrote: > > > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com] > > >>>> Sent: Friday, 17 June 2022 11.46 > > >>>> > > >>>> This patch supports escape special characters (including: > > \",\\,/,\b, > > >>>> /f,/n,/r,/t) when telemetry string. > > >>>> This patch is used to support telemetry xxx-dump commands which > > the > > >>>> string may include special characters. > > >>>> > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > > >>>> --- > > >>>> lib/telemetry/telemetry.c | 96 > > +++++++++++++++++++++++++++++++++++++-- > > >>>> 1 file changed, 93 insertions(+), 3 deletions(-) > > >>>> > > >>>> diff --git a/lib/telemetry/telemetry.c > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644 > > >>>> --- a/lib/telemetry/telemetry.c > > >>>> +++ b/lib/telemetry/telemetry.c > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data > > *d, > > >>>> char *out_buf, size_t buf_len) > > >>>> return used; > > >>>> } > > >>>> > > >>>> +static bool > > >>>> +json_is_special_char(char ch) > > >>>> +{ > > >>>> + static unsigned char is_spec[256] = { 0 }; > > >>>> + static bool init_once; > > >>>> + > > >>>> + if (!init_once) { > > >>>> + is_spec['\"'] = 1; > > >>>> + is_spec['\\'] = 1; > > >>>> + is_spec['/'] = 1; > > >>>> + is_spec['\b'] = 1; > > >>>> + is_spec['\f'] = 1; > > >>>> + is_spec['\n'] = 1; > > >>>> + is_spec['\r'] = 1; > > >>>> + is_spec['\t'] = 1; > > >>>> + init_once = true; > > >>>> + } > > >>>> + > > >>>> + return (bool)is_spec[(unsigned char)ch]; } > > >> > > >> According to the json spec at [1], the characters that need to be > > escaped > > >> are: > > >> a) any characters <0x20 > > >> b) inverted commas/quote character \" > > >> c) the "reverse solidus character", better known to you and I as > > >> the back-slash. > > >> > > >> Therefore, I think this table generation could be simplified, but > > also > > >> expanded using this. For completeness we should also see about > > handling all > > >> control characters if they are encountered. > > >> > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > >> > > >> /Bruce > > > > > > Since it is trivial could be initializer? > > > > > > static const uint8_t is_spec[256] = { > > > [0 ... 0x20] = 1, > > > ['\"' ] = 1, > > > ['\\' ] = 1, > > > ['/'] = 1, > > > > > > etc > > > > > > Or we could change the telemetry API to disallow control characters? > > > > I was thinking about converting 0~0x20, but I don't think there's a > > scenario. > > > > I prefer change the telemetry API to disallow control characters, and > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py > > will returns an error. > > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB. > > So we need to define exactly what the STRING type contains. > > I hope we can all agree that control characters should be disallowed. > > The more complicated question is: Do we want to use the ASCII character set > only, or do we want to use UTF-8 encoded Unicode? > > Personally, think UTF-8 encoded Unicode is more future proof, and would > vote for that. > > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce > another data type for UTF-8 strings. > > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many > SNMP MIBs. > [CP] Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1). [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > > > So I think we could add declaring and checking functions to make sure > > telemetry string do not allow control characters. [CP] I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example. <snip> In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in. Thanks, Ciara
On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote: > Hi folks, > > > -----Original Message----- > > From: Morten Brørup <mb@smartsharesystems.com> > > Sent: Saturday 18 June 2022 10:59 > > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger > > <stephen@networkplumber.org>; Richardson, Bruce > > <bruce.richardson@intel.com> > > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin > > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru; > > jerinj@marvell.com; sachin.saxena@oss.nxp.com; > > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara > > <ciara.power@intel.com> > > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string > > > > +CC: Ciara Power, Telemetry library maintainer > > > > > From: fengchengwen [mailto:fengchengwen@huawei.com] > > > Sent: Saturday, 18 June 2022 05.52 > > > > > > On 2022/6/18 1:05, Stephen Hemminger wrote: > > > > On Fri, 17 Jun 2022 12:25:04 +0100 > > > > Bruce Richardson <bruce.richardson@intel.com> wrote: > > > > > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com] > > > >>>> Sent: Friday, 17 June 2022 11.46 > > > >>>> > > > >>>> This patch supports escape special characters (including: > > > \",\\,/,\b, > > > >>>> /f,/n,/r,/t) when telemetry string. > > > >>>> This patch is used to support telemetry xxx-dump commands which > > > the > > > >>>> string may include special characters. > > > >>>> > > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > > > >>>> --- > > > >>>> lib/telemetry/telemetry.c | 96 > > > +++++++++++++++++++++++++++++++++++++-- > > > >>>> 1 file changed, 93 insertions(+), 3 deletions(-) > > > >>>> > > > >>>> diff --git a/lib/telemetry/telemetry.c > > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644 > > > >>>> --- a/lib/telemetry/telemetry.c > > > >>>> +++ b/lib/telemetry/telemetry.c > > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data > > > *d, > > > >>>> char *out_buf, size_t buf_len) > > > >>>> return used; > > > >>>> } > > > >>>> > > > >>>> +static bool > > > >>>> +json_is_special_char(char ch) > > > >>>> +{ > > > >>>> + static unsigned char is_spec[256] = { 0 }; > > > >>>> + static bool init_once; > > > >>>> + > > > >>>> + if (!init_once) { > > > >>>> + is_spec['\"'] = 1; > > > >>>> + is_spec['\\'] = 1; > > > >>>> + is_spec['/'] = 1; > > > >>>> + is_spec['\b'] = 1; > > > >>>> + is_spec['\f'] = 1; > > > >>>> + is_spec['\n'] = 1; > > > >>>> + is_spec['\r'] = 1; > > > >>>> + is_spec['\t'] = 1; > > > >>>> + init_once = true; > > > >>>> + } > > > >>>> + > > > >>>> + return (bool)is_spec[(unsigned char)ch]; } > > > >> > > > >> According to the json spec at [1], the characters that need to be > > > escaped > > > >> are: > > > >> a) any characters <0x20 > > > >> b) inverted commas/quote character \" > > > >> c) the "reverse solidus character", better known to you and I as > > > >> the back-slash. > > > >> > > > >> Therefore, I think this table generation could be simplified, but > > > also > > > >> expanded using this. For completeness we should also see about > > > handling all > > > >> control characters if they are encountered. > > > >> > > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > > >> > > > >> /Bruce > > > > > > > > Since it is trivial could be initializer? > > > > > > > > static const uint8_t is_spec[256] = { > > > > [0 ... 0x20] = 1, > > > > ['\"' ] = 1, > > > > ['\\' ] = 1, > > > > ['/'] = 1, > > > > > > > > etc > > > > > > > > Or we could change the telemetry API to disallow control characters? > > > > > > I was thinking about converting 0~0x20, but I don't think there's a > > > scenario. > > > > > > I prefer change the telemetry API to disallow control characters, and > > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py > > > will returns an error. > > > > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB. > > > > So we need to define exactly what the STRING type contains. > > > > I hope we can all agree that control characters should be disallowed. > > > > The more complicated question is: Do we want to use the ASCII character set > > only, or do we want to use UTF-8 encoded Unicode? > > > > Personally, think UTF-8 encoded Unicode is more future proof, and would > > vote for that. > > > > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce > > another data type for UTF-8 strings. > > > > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many > > SNMP MIBs. > > > [CP] > > Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1). > > [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > > > > > > So I think we could add declaring and checking functions to make sure > > > telemetry string do not allow control characters. > [CP] > > I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example. > > <snip> > > In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in. > I'm hoping to have some time to try and prototype this myself soon, and send out a draft patch to this mailing list for consideration. /Bruce
On Wed, Jun 22, 2022 at 10:19:48AM +0100, Bruce Richardson wrote: > On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote: > > Hi folks, > > > > > -----Original Message----- > > > From: Morten Brørup <mb@smartsharesystems.com> > > > Sent: Saturday 18 June 2022 10:59 > > > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger > > > <stephen@networkplumber.org>; Richardson, Bruce > > > <bruce.richardson@intel.com> > > > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin > > > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru; > > > jerinj@marvell.com; sachin.saxena@oss.nxp.com; > > > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara > > > <ciara.power@intel.com> > > > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string > > > > > > +CC: Ciara Power, Telemetry library maintainer > > > > > > > From: fengchengwen [mailto:fengchengwen@huawei.com] > > > > Sent: Saturday, 18 June 2022 05.52 > > > > > > > > On 2022/6/18 1:05, Stephen Hemminger wrote: > > > > > On Fri, 17 Jun 2022 12:25:04 +0100 > > > > > Bruce Richardson <bruce.richardson@intel.com> wrote: > > > > > > > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote: > > > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com] > > > > >>>> Sent: Friday, 17 June 2022 11.46 > > > > >>>> > > > > >>>> This patch supports escape special characters (including: > > > > \",\\,/,\b, > > > > >>>> /f,/n,/r,/t) when telemetry string. > > > > >>>> This patch is used to support telemetry xxx-dump commands which > > > > the > > > > >>>> string may include special characters. > > > > >>>> > > > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> > > > > >>>> --- > > > > >>>> lib/telemetry/telemetry.c | 96 > > > > +++++++++++++++++++++++++++++++++++++-- > > > > >>>> 1 file changed, 93 insertions(+), 3 deletions(-) > > > > >>>> > > > > >>>> diff --git a/lib/telemetry/telemetry.c > > > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644 > > > > >>>> --- a/lib/telemetry/telemetry.c > > > > >>>> +++ b/lib/telemetry/telemetry.c > > > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data > > > > *d, > > > > >>>> char *out_buf, size_t buf_len) > > > > >>>> return used; > > > > >>>> } > > > > >>>> > > > > >>>> +static bool > > > > >>>> +json_is_special_char(char ch) > > > > >>>> +{ > > > > >>>> + static unsigned char is_spec[256] = { 0 }; > > > > >>>> + static bool init_once; > > > > >>>> + > > > > >>>> + if (!init_once) { > > > > >>>> + is_spec['\"'] = 1; > > > > >>>> + is_spec['\\'] = 1; > > > > >>>> + is_spec['/'] = 1; > > > > >>>> + is_spec['\b'] = 1; > > > > >>>> + is_spec['\f'] = 1; > > > > >>>> + is_spec['\n'] = 1; > > > > >>>> + is_spec['\r'] = 1; > > > > >>>> + is_spec['\t'] = 1; > > > > >>>> + init_once = true; > > > > >>>> + } > > > > >>>> + > > > > >>>> + return (bool)is_spec[(unsigned char)ch]; } > > > > >> > > > > >> According to the json spec at [1], the characters that need to be > > > > escaped > > > > >> are: > > > > >> a) any characters <0x20 > > > > >> b) inverted commas/quote character \" > > > > >> c) the "reverse solidus character", better known to you and I as > > > > >> the back-slash. > > > > >> > > > > >> Therefore, I think this table generation could be simplified, but > > > > also > > > > >> expanded using this. For completeness we should also see about > > > > handling all > > > > >> control characters if they are encountered. > > > > >> > > > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > > > >> > > > > >> /Bruce > > > > > > > > > > Since it is trivial could be initializer? > > > > > > > > > > static const uint8_t is_spec[256] = { > > > > > [0 ... 0x20] = 1, > > > > > ['\"' ] = 1, > > > > > ['\\' ] = 1, > > > > > ['/'] = 1, > > > > > > > > > > etc > > > > > > > > > > Or we could change the telemetry API to disallow control characters? > > > > > > > > I was thinking about converting 0~0x20, but I don't think there's a > > > > scenario. > > > > > > > > I prefer change the telemetry API to disallow control characters, and > > > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py > > > > will returns an error. > > > > > > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB. > > > > > > So we need to define exactly what the STRING type contains. > > > > > > I hope we can all agree that control characters should be disallowed. > > > > > > The more complicated question is: Do we want to use the ASCII character set > > > only, or do we want to use UTF-8 encoded Unicode? > > > > > > Personally, think UTF-8 encoded Unicode is more future proof, and would > > > vote for that. > > > > > > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce > > > another data type for UTF-8 strings. > > > > > > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many > > > SNMP MIBs. > > > > > [CP] > > > > Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1). > > > > [1] https://www.rfc-editor.org/rfc/rfc8259.txt > > > > > > > > > > So I think we could add declaring and checking functions to make sure > > > > telemetry string do not allow control characters. > > [CP] > > > > I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example. > > > > <snip> > > > > In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in. > > > > I'm hoping to have some time to try and prototype this myself soon, and > send out a draft patch to this mailing list for consideration. > Here is an RFC of my current prototype of this: http://patches.dpdk.org/project/dpdk/list/?series=23739 Feedback welcome. Regards, /Bruce
diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644 --- a/lib/telemetry/telemetry.c +++ b/lib/telemetry/telemetry.c @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len) return used; } +static bool +json_is_special_char(char ch) +{ + static unsigned char is_spec[256] = { 0 }; + static bool init_once; + + if (!init_once) { + is_spec['\"'] = 1; + is_spec['\\'] = 1; + is_spec['/'] = 1; + is_spec['\b'] = 1; + is_spec['\f'] = 1; + is_spec['\n'] = 1; + is_spec['\r'] = 1; + is_spec['\t'] = 1; + init_once = true; + } + + return (bool)is_spec[(unsigned char)ch]; +} + +static size_t +json_escape_special_char(char *buf, const char ch) +{ + size_t used = 0; + + switch (ch) { + case '\"': + buf[used++] = '\\'; + buf[used++] = '\"'; + break; + case '\\': + buf[used++] = '\\'; + buf[used++] = '\\'; + break; + case '/': + buf[used++] = '\\'; + buf[used++] = '/'; + break; + case '\b': + buf[used++] = '\\'; + buf[used++] = 'b'; + break; + case '\f': + buf[used++] = '\\'; + buf[used++] = 'f'; + break; + case '\n': + buf[used++] = '\\'; + buf[used++] = 'n'; + break; + case '\r': + buf[used++] = '\\'; + buf[used++] = 'r'; + break; + case '\t': + buf[used++] = '\\'; + buf[used++] = 't'; + break; + default: + break; + } + + return used; +} + +static size_t +json_format_string(char *buf, size_t len, const char *str) +{ + size_t used = 0; + + while (*str) { + if (unlikely(len < used + 2)) { + TMTY_LOG(WARNING, "Insufficient buffer when json format string\n"); + break; + } + + if (json_is_special_char(*str)) + used += json_escape_special_char(buf + used, *str); + else + buf[used++] = *str; + + str++; + } + + return used; +} + static void output_json(const char *cmd, const struct rte_tel_data *d, int s) { @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct rte_tel_data *d, int s) MAX_CMD_LEN, cmd ? cmd : "none"); break; case RTE_TEL_STRING: - used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}", - MAX_CMD_LEN, cmd, - RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str); + used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"", + MAX_CMD_LEN, cmd); + used += json_format_string(out_buf + used, + sizeof(out_buf) - used - 3, d->data.str); + used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}"); break; case RTE_TEL_DICT: prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":",
This patch supports escape special characters (including: \",\\,/,\b, /f,/n,/r,/t) when telemetry string. This patch is used to support telemetry xxx-dump commands which the string may include special characters. Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> --- lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 3 deletions(-)