[v2,3/4] test: change memory barrier variables to uint64_t

Message ID 1557170634-99830-1-git-send-email-drc@linux.vnet.ibm.com (mailing list archive)
State Superseded, archived
Headers
Series [v2,1/4] test: fix typo in print statement |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

David Christensen May 6, 2019, 7:23 p.m. UTC
  Memory barrier failures can be intermittent. Increase the size of the
sum/val/iteration variables to allow tests that can run for days so that
sporadic errors can be identified.

Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
---
v2:
* Removed change to ITER_MAX

 app/test/test_barrier.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)
  

Comments

Ananyev, Konstantin May 7, 2019, 10:39 a.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of David Christensen
> Sent: Monday, May 6, 2019 8:24 PM
> To: dev@dpdk.org
> Cc: David Christensen <drc@linux.vnet.ibm.com>
> Subject: [dpdk-dev] [PATCH v2 3/4] test: change memory barrier variables to uint64_t
> 
> Memory barrier failures can be intermittent. Increase the size of the
> sum/val/iteration variables to allow tests that can run for days so that
> sporadic errors can be identified.
> 
> Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
> ---
> v2:
> * Removed change to ITER_MAX
> 
>  app/test/test_barrier.c | 23 ++++++++++++-----------
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test/test_barrier.c b/app/test/test_barrier.c
> index ae37b1e..a022708 100644
> --- a/app/test/test_barrier.c
> +++ b/app/test/test_barrier.c
> @@ -55,8 +55,8 @@ struct plock {
>   */
>  struct plock_test {
>  	struct plock lock;
> -	uint32_t val;
> -	uint32_t iter;
> +	uint64_t val;
> +	uint64_t iter;
>  };
> 
>  /*
> @@ -65,8 +65,8 @@ struct plock_test {
>   */
>  struct lcore_plock_test {
>  	struct plock_test *pt[2]; /* shared, lock-protected data */
> -	uint32_t sum[2];          /* local copy of the shared data */
> -	uint32_t iter;            /* number of iterations to perfom */
> +	uint64_t sum[2];          /* local copy of the shared data */
> +	uint64_t iter;            /* number of iterations to perfom */
>  	uint32_t lc;              /* given lcore id */
>  };

Not sure why you think this is needed - right now
both iter and sum wouldn't be bigger than 32bit
(max value that sum can reach: 2^27).

> 
> @@ -130,7 +130,8 @@ struct lcore_plock_test {
>  plock_test1_lcore(void *data)
>  {
>  	uint64_t tm;
> -	uint32_t i, lc, ln, n;
> +	uint32_t lc, ln;
> +	uint64_t i, n;
>  	struct lcore_plock_test *lpt;
> 
>  	lpt = data;
> @@ -166,9 +167,9 @@ struct lcore_plock_test {
> 
>  	tm = rte_get_timer_cycles() - tm;
> 
> -	printf("%s(%u): %u iterations finished, in %" PRIu64
> +	printf("%s(%u): %lu iterations finished, in %" PRIu64
>  		" cycles, %#Lf cycles/iteration, "
> -		"local sum={%u, %u}\n",
> +		"local sum={%lu, %lu}\n",
>  		__func__, lc, i, tm, (long double)tm / i,
>  		lpt->sum[0], lpt->sum[1]);
>  	return 0;
> @@ -184,11 +185,11 @@ struct lcore_plock_test {
>   * and local data are the same.
>   */
>  static int
> -plock_test(uint32_t iter, enum plock_use_type utype)
> +plock_test(uint64_t iter, enum plock_use_type utype)
>  {
>  	int32_t rc;
>  	uint32_t i, lc, n;
> -	uint32_t *sum;
> +	uint64_t *sum;
>  	struct plock_test *pt;
>  	struct lcore_plock_test *lpt;
> 
> @@ -199,7 +200,7 @@ struct lcore_plock_test {
>  	lpt = calloc(n, sizeof(*lpt));
>  	sum = calloc(n + 1, sizeof(*sum));
> 
> -	printf("%s(iter=%u, utype=%u) started on %u lcores\n",
> +	printf("%s(iter=%lu, utype=%u) started on %u lcores\n",
>  		__func__, iter, utype, n);
> 
>  	if (pt == NULL || lpt == NULL || sum == NULL) {
> @@ -247,7 +248,7 @@ struct lcore_plock_test {
> 
>  	rc = 0;
>  	for (i = 0; i != n; i++) {
> -		printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
> +		printf("%s: sum[%u]=%lu, pt[%u].val=%lu, pt[%u].iter=%lu;\n",

Here and in other places, you need to use PRIu64 for 64 bit values.

>  			__func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
> 
>  		/* race condition occurred, lock doesn't work properly */
> --
> 1.8.3.1
  
David Christensen May 7, 2019, 5:37 p.m. UTC | #2
>> @@ -65,8 +65,8 @@ struct plock_test {
>>    */
>>   struct lcore_plock_test {
>>   	struct plock_test *pt[2]; /* shared, lock-protected data */
>> -	uint32_t sum[2];          /* local copy of the shared data */
>> -	uint32_t iter;            /* number of iterations to perfom */
>> +	uint64_t sum[2];          /* local copy of the shared data */
>> +	uint64_t iter;            /* number of iterations to perfom */
>>   	uint32_t lc;              /* given lcore id */
>>   };
> 
> Not sure why you think this is needed - right now
> both iter and sum wouldn't be bigger than 32bit
> (max value that sum can reach: 2^27).
> 

I view test_barrier and other tools in the test directory as functional 
test tools for developers.  My understanding is that they are not 
typically run as part of DTS or any other validation process (please let 
me know if that is incorrect).  As a result, a developer that is testing 
this functionality might have a valid reason to alter the value of 
ITER_MAX for a specific functional test.

While validating the changes in patch 4 of the series I needed to run 
more that 2^27 iterations.  I encountered situations where some runs of 
my test code would fail and other runs would pass when using the default 
ITER_MAX value.  As a result, I needed to extend the number of 
iterations tested to gain confidence in the final fix for Power systems. 
At the end, I was running 64 billion iterations (MAX_ITER = 
0xF_0000_0000) across 64 Power 9 lcores which takes ~16 hours.

I felt the patch to extend these values to 64 bit might benefit other 
developers in the future. And since the cost is low (this is not EAL 
library code pulled into every user application) there's no harm in 
making the change.

>> -		printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
>> +		printf("%s: sum[%u]=%lu, pt[%u].val=%lu, pt[%u].iter=%lu;\n",
> 
> Here and in other places, you need to use PRIu64 for 64 bit values.

Ok. I'll resubmit if there are no objections to the rationale behind the 
change.

Dave
  
Ananyev, Konstantin May 7, 2019, 11:15 p.m. UTC | #3
> -----Original Message-----
> From: David Christensen [mailto:drc@linux.vnet.ibm.com]
> Sent: Tuesday, May 7, 2019 6:38 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] test: change memory barrier variables to uint64_t
> 
> >> @@ -65,8 +65,8 @@ struct plock_test {
> >>    */
> >>   struct lcore_plock_test {
> >>   	struct plock_test *pt[2]; /* shared, lock-protected data */
> >> -	uint32_t sum[2];          /* local copy of the shared data */
> >> -	uint32_t iter;            /* number of iterations to perfom */
> >> +	uint64_t sum[2];          /* local copy of the shared data */
> >> +	uint64_t iter;            /* number of iterations to perfom */
> >>   	uint32_t lc;              /* given lcore id */
> >>   };
> >
> > Not sure why you think this is needed - right now
> > both iter and sum wouldn't be bigger than 32bit
> > (max value that sum can reach: 2^27).
> >
> 
> I view test_barrier and other tools in the test directory as functional
> test tools for developers.  My understanding is that they are not
> typically run as part of DTS or any other validation process (please let
> me know if that is incorrect).  As a result, a developer that is testing
> this functionality might have a valid reason to alter the value of
> ITER_MAX for a specific functional test.
> 
> While validating the changes in patch 4 of the series I needed to run
> more that 2^27 iterations.  I encountered situations where some runs of
> my test code would fail and other runs would pass when using the default
> ITER_MAX value.  As a result, I needed to extend the number of
> iterations tested to gain confidence in the final fix for Power systems.
> At the end, I was running 64 billion iterations (MAX_ITER =
> 0xF_0000_0000) across 64 Power 9 lcores which takes ~16 hours.
> 
> I felt the patch to extend these values to 64 bit might benefit other
> developers in the future. And since the cost is low (this is not EAL
> library code pulled into every user application) there's no harm in
> making the change.

If you feel that it might be useful, then it is ok by me.
Konstantin

> 
> >> -		printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
> >> +		printf("%s: sum[%u]=%lu, pt[%u].val=%lu, pt[%u].iter=%lu;\n",
> >
> > Here and in other places, you need to use PRIu64 for 64 bit values.
> 
> Ok. I'll resubmit if there are no objections to the rationale behind the
> change.
> 
> Dave
  

Patch

diff --git a/app/test/test_barrier.c b/app/test/test_barrier.c
index ae37b1e..a022708 100644
--- a/app/test/test_barrier.c
+++ b/app/test/test_barrier.c
@@ -55,8 +55,8 @@  struct plock {
  */
 struct plock_test {
 	struct plock lock;
-	uint32_t val;
-	uint32_t iter;
+	uint64_t val;
+	uint64_t iter;
 };
 
 /*
@@ -65,8 +65,8 @@  struct plock_test {
  */
 struct lcore_plock_test {
 	struct plock_test *pt[2]; /* shared, lock-protected data */
-	uint32_t sum[2];          /* local copy of the shared data */
-	uint32_t iter;            /* number of iterations to perfom */
+	uint64_t sum[2];          /* local copy of the shared data */
+	uint64_t iter;            /* number of iterations to perfom */
 	uint32_t lc;              /* given lcore id */
 };
 
@@ -130,7 +130,8 @@  struct lcore_plock_test {
 plock_test1_lcore(void *data)
 {
 	uint64_t tm;
-	uint32_t i, lc, ln, n;
+	uint32_t lc, ln;
+	uint64_t i, n;
 	struct lcore_plock_test *lpt;
 
 	lpt = data;
@@ -166,9 +167,9 @@  struct lcore_plock_test {
 
 	tm = rte_get_timer_cycles() - tm;
 
-	printf("%s(%u): %u iterations finished, in %" PRIu64
+	printf("%s(%u): %lu iterations finished, in %" PRIu64
 		" cycles, %#Lf cycles/iteration, "
-		"local sum={%u, %u}\n",
+		"local sum={%lu, %lu}\n",
 		__func__, lc, i, tm, (long double)tm / i,
 		lpt->sum[0], lpt->sum[1]);
 	return 0;
@@ -184,11 +185,11 @@  struct lcore_plock_test {
  * and local data are the same.
  */
 static int
-plock_test(uint32_t iter, enum plock_use_type utype)
+plock_test(uint64_t iter, enum plock_use_type utype)
 {
 	int32_t rc;
 	uint32_t i, lc, n;
-	uint32_t *sum;
+	uint64_t *sum;
 	struct plock_test *pt;
 	struct lcore_plock_test *lpt;
 
@@ -199,7 +200,7 @@  struct lcore_plock_test {
 	lpt = calloc(n, sizeof(*lpt));
 	sum = calloc(n + 1, sizeof(*sum));
 
-	printf("%s(iter=%u, utype=%u) started on %u lcores\n",
+	printf("%s(iter=%lu, utype=%u) started on %u lcores\n",
 		__func__, iter, utype, n);
 
 	if (pt == NULL || lpt == NULL || sum == NULL) {
@@ -247,7 +248,7 @@  struct lcore_plock_test {
 
 	rc = 0;
 	for (i = 0; i != n; i++) {
-		printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
+		printf("%s: sum[%u]=%lu, pt[%u].val=%lu, pt[%u].iter=%lu;\n",
 			__func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
 
 		/* race condition occurred, lock doesn't work properly */