[v5,2/2] eal: add additional info if core mask too long

Message ID 20210923110213.21350-2-david.hunt@intel.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v5,1/2] eal: add additional info if core list too long |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional fail Functional Testing issues
ci/iol-x86_64-compile-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed

Commit Message

Hunt, David Sept. 23, 2021, 11:02 a.m. UTC
  If the user requests to use an lcore above 128 using -c,
the eal will exit with "EAL: invalid coremask syntax" and
very little else useful information.

This patch adds some extra information suggesting to use --lcores
so that physical cores above RTE_MAX_LCORE (default 128) can be
used. This is achieved by using the --lcores option by mapping
the logical cores in the application to physical cores.

For example, if "-c 0x300000000000000000000000000000000" is
used, we see the following additional output on the command line:

EAL: lcore 128 >= RTE_MAX_LCORE (128)
EAL: lcore 129 >= RTE_MAX_LCORE (128)
EAL: to use high physical core ids , please use --lcores to
map them to lcore ids below RTE_MAX_LCORE,
EAL:     e.g. --lcores 0@128,1@129

Signed-off-by: David Hunt <david.hunt@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
changes in v3
   * added this patch to the set. Addresses the changes for
     the -c option.
changes in v4
   * fixed buffer overrun in populating lcore array.
   * switched from strlcpy to strdup due to a clang error.
changes in v5
   * replaced strdup and frees with a const char *, as we
     just need to keep track of original pointer location.
   * reverted err: usage to return -1, as no free() needed.
   * other minod code cleanups.
---
---
 lib/eal/common/eal_common_options.c | 47 ++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 7 deletions(-)
  

Comments

David Marchand Nov. 2, 2021, 5:45 p.m. UTC | #1
On Thu, Sep 23, 2021 at 1:03 PM David Hunt <david.hunt@intel.com> wrote:
>
> If the user requests to use an lcore above 128 using -c,
> the eal will exit with "EAL: invalid coremask syntax" and
> very little else useful information.
>
> This patch adds some extra information suggesting to use --lcores
> so that physical cores above RTE_MAX_LCORE (default 128) can be
> used. This is achieved by using the --lcores option by mapping
> the logical cores in the application to physical cores.
>
> For example, if "-c 0x300000000000000000000000000000000" is
> used, we see the following additional output on the command line:
>
> EAL: lcore 128 >= RTE_MAX_LCORE (128)
> EAL: lcore 129 >= RTE_MAX_LCORE (128)
> EAL: to use high physical core ids , please use --lcores to
> map them to lcore ids below RTE_MAX_LCORE,
> EAL:     e.g. --lcores 0@128,1@129
>
> Signed-off-by: David Hunt <david.hunt@intel.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> changes in v3
>    * added this patch to the set. Addresses the changes for
>      the -c option.
> changes in v4
>    * fixed buffer overrun in populating lcore array.
>    * switched from strlcpy to strdup due to a clang error.
> changes in v5
>    * replaced strdup and frees with a const char *, as we
>      just need to keep track of original pointer location.
>    * reverted err: usage to return -1, as no free() needed.
>    * other minod code cleanups.
> ---
> ---
>  lib/eal/common/eal_common_options.c | 47 ++++++++++++++++++++++++-----
>  1 file changed, 40 insertions(+), 7 deletions(-)
>
> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
> index 72735e0b09..7f715e4c15 100644
> --- a/lib/eal/common/eal_common_options.c
> +++ b/lib/eal/common/eal_common_options.c
> @@ -750,10 +750,12 @@ check_core_list(int *lcores, unsigned int count)
>  static int
>  eal_parse_coremask(const char *coremask, int *cores)
>  {
> -       unsigned count = 0;
> +       unsigned int count = 0;
>         int i, j, idx;
>         int val;
>         char c;
> +       int lcores[RTE_MAX_LCORE];
> +       const char *coremask_orig = coremask;
>
>         for (idx = 0; idx < RTE_MAX_LCORE; idx++)
>                 cores[idx] = -1;
> @@ -770,29 +772,60 @@ eal_parse_coremask(const char *coremask, int *cores)
>         i = strlen(coremask);
>         while ((i > 0) && isblank(coremask[i - 1]))
>                 i--;
> -       if (i == 0)
> +       if (i == 0) {
> +               RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
> +                               coremask_orig);
>                 return -1;
> +       }
>
> -       for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
> +       for (i = i - 1; i >= 0; i--) {

This loop exit condition changes here: this ensures that, once we
leave the loop, i == -1.
As a consequence... (see below)


>                 c = coremask[i];
>                 if (isxdigit(c) == 0) {
>                         /* invalid characters */
> +                       RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
> +                                       coremask_orig);
>                         return -1;
>                 }
>                 val = xdigit2val(c);
> -               for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
> +               for (j = 0; j < BITS_PER_HEX; j++, idx++)
>                 {
>                         if ((1 << j) & val) {
> -                               cores[idx] = count;
> +                               if (count >= RTE_MAX_LCORE) {
> +                                       RTE_LOG(ERR, EAL, "Too many lcores provided. Cannot exceed %d\n",
> +                                                       RTE_MAX_LCORE);
> +                                       return -1;
> +                               }
> +                               lcores[count] = idx;
>                                 count++;
>                         }
>                 }
>         }

... this loop below is dead code.

>         for (; i >= 0; i--)
> -               if (coremask[i] != '0')
> +               if (coremask[i] != '0') {
> +                       RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
> +                                       coremask_orig);

Nit: capital letter.

>                         return -1;
> -       if (count == 0)
> +               }
> +       if (count == 0) {
> +               RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
> +                               coremask_orig);
> +               return -1;
> +       }
> +
> +       if (check_core_list(lcores, count))
>                 return -1;
> +
> +       /*
> +        * Now that we've gto a list of cores no longer than

Same typo I commented on patch 1 for v4.



> +        * RTE_MAX_LCORE, and no lcore in that list is greater
> +        * than RTE_MAX_LCORE, populate the cores
> +        * array and return.
> +        */
> +       do {
> +               count--;
> +               cores[lcores[count]] = count;
> +       } while (count != 0);
> +
>         return 0;
>  }
>
> --
> 2.17.1
>
  
Hunt, David Nov. 3, 2021, 10:27 a.m. UTC | #2
Hi David,

On 2/11/2021 5:45 PM, David Marchand wrote:
> On Thu, Sep 23, 2021 at 1:03 PM David Hunt <david.hunt@intel.com> wrote:
>> If the user requests to use an lcore above 128 using -c,
>> the eal will exit with "EAL: invalid coremask syntax" and
>> very little else useful information.
>>
>> This patch adds some extra information suggesting to use --lcores
>> so that physical cores above RTE_MAX_LCORE (default 128) can be
>> used. This is achieved by using the --lcores option by mapping
>> the logical cores in the application to physical cores.
>>
>> For example, if "-c 0x300000000000000000000000000000000" is
>> used, we see the following additional output on the command line:
>>
>> EAL: lcore 128 >= RTE_MAX_LCORE (128)
>> EAL: lcore 129 >= RTE_MAX_LCORE (128)
>> EAL: to use high physical core ids , please use --lcores to
>> map them to lcore ids below RTE_MAX_LCORE,
>> EAL:     e.g. --lcores 0@128,1@129
>>
>> Signed-off-by: David Hunt <david.hunt@intel.com>
>> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
>> ---
>> changes in v3
>>     * added this patch to the set. Addresses the changes for
>>       the -c option.
>> changes in v4
>>     * fixed buffer overrun in populating lcore array.
>>     * switched from strlcpy to strdup due to a clang error.
>> changes in v5
>>     * replaced strdup and frees with a const char *, as we
>>       just need to keep track of original pointer location.
>>     * reverted err: usage to return -1, as no free() needed.
>>     * other minod code cleanups.
>> ---
>> ---
>>   lib/eal/common/eal_common_options.c | 47 ++++++++++++++++++++++++-----
>>   1 file changed, 40 insertions(+), 7 deletions(-)
>>
>> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
>> index 72735e0b09..7f715e4c15 100644
>> --- a/lib/eal/common/eal_common_options.c
>> +++ b/lib/eal/common/eal_common_options.c
>> @@ -750,10 +750,12 @@ check_core_list(int *lcores, unsigned int count)
>>   static int
>>   eal_parse_coremask(const char *coremask, int *cores)
>>   {
>> -       unsigned count = 0;
>> +       unsigned int count = 0;
>>          int i, j, idx;
>>          int val;
>>          char c;
>> +       int lcores[RTE_MAX_LCORE];
>> +       const char *coremask_orig = coremask;
>>
>>          for (idx = 0; idx < RTE_MAX_LCORE; idx++)
>>                  cores[idx] = -1;
>> @@ -770,29 +772,60 @@ eal_parse_coremask(const char *coremask, int *cores)
>>          i = strlen(coremask);
>>          while ((i > 0) && isblank(coremask[i - 1]))
>>                  i--;
>> -       if (i == 0)
>> +       if (i == 0) {
>> +               RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
>> +                               coremask_orig);
>>                  return -1;
>> +       }
>>
>> -       for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
>> +       for (i = i - 1; i >= 0; i--) {
> This loop exit condition changes here: this ensures that, once we
> leave the loop, i == -1.
> As a consequence... (see below)
>
>
>>                  c = coremask[i];
>>                  if (isxdigit(c) == 0) {
>>                          /* invalid characters */
>> +                       RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
>> +                                       coremask_orig);
>>                          return -1;
>>                  }
>>                  val = xdigit2val(c);
>> -               for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
>> +               for (j = 0; j < BITS_PER_HEX; j++, idx++)
>>                  {
>>                          if ((1 << j) & val) {
>> -                               cores[idx] = count;
>> +                               if (count >= RTE_MAX_LCORE) {
>> +                                       RTE_LOG(ERR, EAL, "Too many lcores provided. Cannot exceed %d\n",
>> +                                                       RTE_MAX_LCORE);
>> +                                       return -1;
>> +                               }
>> +                               lcores[count] = idx;
>>                                  count++;
>>                          }
>>                  }
>>          }
> ... this loop below is dead code.


Sure, no need to loop. I'll take out the loop, and just check for the 
first two characters to be '0x', as they're already trimmed.


>>          for (; i >= 0; i--)
>> -               if (coremask[i] != '0')
>> +               if (coremask[i] != '0') {
>> +                       RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
>> +                                       coremask_orig);
> Nit: capital letter.


Many other instances of 'invalid' in this file, will I change them all 
to "Invalid", or just this one?


>
>>                          return -1;
>> -       if (count == 0)
>> +               }
>> +       if (count == 0) {
>> +               RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
>> +                               coremask_orig);
>> +               return -1;
>> +       }
>> +
>> +       if (check_core_list(lcores, count))
>>                  return -1;
>> +
>> +       /*
>> +        * Now that we've gto a list of cores no longer than
> Same typo I commented on patch 1 for v4.

Will fix.


>
>
>> +        * RTE_MAX_LCORE, and no lcore in that list is greater
>> +        * than RTE_MAX_LCORE, populate the cores
>> +        * array and return.
>> +        */
>> +       do {
>> +               count--;
>> +               cores[lcores[count]] = count;
>> +       } while (count != 0);
>> +
>>          return 0;
>>   }
>>
>> --
>> 2.17.1
>>
  
David Marchand Nov. 3, 2021, 10:29 a.m. UTC | #3
On Wed, Nov 3, 2021 at 11:27 AM David Hunt <david.hunt@intel.com> wrote:
>
> >>          for (; i >= 0; i--)
> >> -               if (coremask[i] != '0')
> >> +               if (coremask[i] != '0') {
> >> +                       RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
> >> +                                       coremask_orig);
> > Nit: capital letter.
>
>
> Many other instances of 'invalid' in this file, will I change them all
> to "Invalid", or just this one?

Let's be consistent for additions of this patch.
Thanks.
  
Hunt, David Nov. 3, 2021, 1:30 p.m. UTC | #4
On 3/11/2021 10:27 AM, David Hunt wrote:
> Hi David,
>
> On 2/11/2021 5:45 PM, David Marchand wrote:
>> On Thu, Sep 23, 2021 at 1:03 PM David Hunt <david.hunt@intel.com> wrote:
>>> If the user requests to use an lcore above 128 using -c,
>>> the eal will exit with "EAL: invalid coremask syntax" and
>>> very little else useful information.
>>>
>>> This patch adds some extra information suggesting to use --lcores
>>> so that physical cores above RTE_MAX_LCORE (default 128) can be
>>> used. This is achieved by using the --lcores option by mapping
>>> the logical cores in the application to physical cores.
>>>
>>> For example, if "-c 0x300000000000000000000000000000000" is
>>> used, we see the following additional output on the command line:
>>>
>>> EAL: lcore 128 >= RTE_MAX_LCORE (128)
>>> EAL: lcore 129 >= RTE_MAX_LCORE (128)
>>> EAL: to use high physical core ids , please use --lcores to
>>> map them to lcore ids below RTE_MAX_LCORE,
>>> EAL:     e.g. --lcores 0@128,1@129
>>>
>>> Signed-off-by: David Hunt <david.hunt@intel.com>
>>> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
>>> ---
>>> changes in v3
>>>     * added this patch to the set. Addresses the changes for
>>>       the -c option.
>>> changes in v4
>>>     * fixed buffer overrun in populating lcore array.
>>>     * switched from strlcpy to strdup due to a clang error.
>>> changes in v5
>>>     * replaced strdup and frees with a const char *, as we
>>>       just need to keep track of original pointer location.
>>>     * reverted err: usage to return -1, as no free() needed.
>>>     * other minod code cleanups.
>>> ---
>>> ---
>>>   lib/eal/common/eal_common_options.c | 47 
>>> ++++++++++++++++++++++++-----
>>>   1 file changed, 40 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/lib/eal/common/eal_common_options.c 
>>> b/lib/eal/common/eal_common_options.c
>>> index 72735e0b09..7f715e4c15 100644
>>> --- a/lib/eal/common/eal_common_options.c
>>> +++ b/lib/eal/common/eal_common_options.c
>>> @@ -750,10 +750,12 @@ check_core_list(int *lcores, unsigned int count)
>>>   static int
>>>   eal_parse_coremask(const char *coremask, int *cores)
>>>   {
>>> -       unsigned count = 0;
>>> +       unsigned int count = 0;
>>>          int i, j, idx;
>>>          int val;
>>>          char c;
>>> +       int lcores[RTE_MAX_LCORE];
>>> +       const char *coremask_orig = coremask;
>>>
>>>          for (idx = 0; idx < RTE_MAX_LCORE; idx++)
>>>                  cores[idx] = -1;
>>> @@ -770,29 +772,60 @@ eal_parse_coremask(const char *coremask, int 
>>> *cores)
>>>          i = strlen(coremask);
>>>          while ((i > 0) && isblank(coremask[i - 1]))
>>>                  i--;
>>> -       if (i == 0)
>>> +       if (i == 0) {
>>> +               RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
>>> +                               coremask_orig);
>>>                  return -1;
>>> +       }
>>>
>>> -       for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
>>> +       for (i = i - 1; i >= 0; i--) {
>> This loop exit condition changes here: this ensures that, once we
>> leave the loop, i == -1.
>> As a consequence... (see below)
>>
>>
>>>                  c = coremask[i];
>>>                  if (isxdigit(c) == 0) {
>>>                          /* invalid characters */
>>> +                       RTE_LOG(ERR, EAL, "invalid characters in 
>>> coremask: [%s]\n",
>>> +                                       coremask_orig);
>>>                          return -1;
>>>                  }
>>>                  val = xdigit2val(c);
>>> -               for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; 
>>> j++, idx++)
>>> +               for (j = 0; j < BITS_PER_HEX; j++, idx++)
>>>                  {
>>>                          if ((1 << j) & val) {
>>> -                               cores[idx] = count;
>>> +                               if (count >= RTE_MAX_LCORE) {
>>> +                                       RTE_LOG(ERR, EAL, "Too many 
>>> lcores provided. Cannot exceed %d\n",
>>> + RTE_MAX_LCORE);
>>> +                                       return -1;
>>> +                               }
>>> +                               lcores[count] = idx;
>>>                                  count++;
>>>                          }
>>>                  }
>>>          }
>> ... this loop below is dead code.
>
>
> Sure, no need to loop. I'll take out the loop, and just check for the 
> first two characters to be '0x', as they're already trimmed.
>
>

On second thoughts, looking at this closer, the any '0x' or '0X' at the 
start is skipped earlier in the code, so all we're checking for here is 
a leading zero to the hex, which does not seem valid, as that would mean 
that 0x0ff is valid, but 0xff is not.

Take the following 2 cases:

-c f
EAL: Invalid start [0] to coremask: [f]

and even worse:

-c 0xf
EAL: Invalid start [0] to coremask: [0xf]

So I think it makes sense to remove that particular check altogether, in 
which case both "-c f" and "-c 0xf" will work as expected.

I will make this change in next version.

--snip--
  

Patch

diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index 72735e0b09..7f715e4c15 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -750,10 +750,12 @@  check_core_list(int *lcores, unsigned int count)
 static int
 eal_parse_coremask(const char *coremask, int *cores)
 {
-	unsigned count = 0;
+	unsigned int count = 0;
 	int i, j, idx;
 	int val;
 	char c;
+	int lcores[RTE_MAX_LCORE];
+	const char *coremask_orig = coremask;
 
 	for (idx = 0; idx < RTE_MAX_LCORE; idx++)
 		cores[idx] = -1;
@@ -770,29 +772,60 @@  eal_parse_coremask(const char *coremask, int *cores)
 	i = strlen(coremask);
 	while ((i > 0) && isblank(coremask[i - 1]))
 		i--;
-	if (i == 0)
+	if (i == 0) {
+		RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
+				coremask_orig);
 		return -1;
+	}
 
-	for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+	for (i = i - 1; i >= 0; i--) {
 		c = coremask[i];
 		if (isxdigit(c) == 0) {
 			/* invalid characters */
+			RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
+					coremask_orig);
 			return -1;
 		}
 		val = xdigit2val(c);
-		for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+		for (j = 0; j < BITS_PER_HEX; j++, idx++)
 		{
 			if ((1 << j) & val) {
-				cores[idx] = count;
+				if (count >= RTE_MAX_LCORE) {
+					RTE_LOG(ERR, EAL, "Too many lcores provided. Cannot exceed %d\n",
+							RTE_MAX_LCORE);
+					return -1;
+				}
+				lcores[count] = idx;
 				count++;
 			}
 		}
 	}
 	for (; i >= 0; i--)
-		if (coremask[i] != '0')
+		if (coremask[i] != '0') {
+			RTE_LOG(ERR, EAL, "invalid characters in coremask: [%s]\n",
+					coremask_orig);
 			return -1;
-	if (count == 0)
+		}
+	if (count == 0) {
+		RTE_LOG(ERR, EAL, "No lcores in coremask: [%s]\n",
+				coremask_orig);
+		return -1;
+	}
+
+	if (check_core_list(lcores, count))
 		return -1;
+
+	/*
+	 * Now that we've gto a list of cores no longer than
+	 * RTE_MAX_LCORE, and no lcore in that list is greater
+	 * than RTE_MAX_LCORE, populate the cores
+	 * array and return.
+	 */
+	do {
+		count--;
+		cores[lcores[count]] = count;
+	} while (count != 0);
+
 	return 0;
 }