common/cnxk: use cas with release semantics for batch alloc

Message ID 20211130054527.2696881-1-asekhar@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series common/cnxk: use cas with release semantics for batch alloc |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/github-robot: build success github build: passed
ci/intel-Testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS

Commit Message

Ashwin Sekhar T K Nov. 30, 2021, 5:45 a.m. UTC
  Before issuing the batch alloc, we clear the first word of
cache lines so that NPA can update the status. Make sure that
this line clear is flushed before the batch alloc is issued.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
 drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
 drivers/common/cnxk/roc_npa.h        |  2 +-
 3 files changed, 22 insertions(+), 1 deletion(-)
  

Comments

Jerin Jacob Jan. 6, 2022, 12:27 p.m. UTC | #1
On Tue, Nov 30, 2021 at 11:17 AM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Before issuing the batch alloc, we clear the first word of
> cache lines so that NPA can update the status. Make sure that
> this line clear is flushed before the batch alloc is issued.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>

Changed cas -> CAS in git commit

Acked-by: Jerin Jacob <jerinj@marvell.com>
Applied to dpdk-next-net-mrvl/for-next-net. Thanks


> ---
>  drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
>  drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
>  drivers/common/cnxk/roc_npa.h        |  2 +-
>  3 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
> index fe5f7f46d0..4f15503c29 100644
> --- a/drivers/common/cnxk/roc_io.h
> +++ b/drivers/common/cnxk/roc_io.h
> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
>         return compare;
>  }
>
> +static __plt_always_inline uint64_t
> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
> +{
> +       asm volatile(PLT_CPU_FEATURE_PREAMBLE
> +                    "casl %[compare], %[swap], [%[ptr]]\n"
> +                    : [compare] "+r"(compare)
> +                    : [swap] "r"(swap), [ptr] "r"(ptr)
> +                    : "memory");
> +
> +       return compare;
> +}
> +
>  static __plt_always_inline uint64_t
>  roc_atomic64_add_nosync(int64_t incr, int64_t *ptr)
>  {
> diff --git a/drivers/common/cnxk/roc_io_generic.h b/drivers/common/cnxk/roc_io_generic.h
> index ceaa3a38d8..5f90835c09 100644
> --- a/drivers/common/cnxk/roc_io_generic.h
> +++ b/drivers/common/cnxk/roc_io_generic.h
> @@ -41,6 +41,15 @@ roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
>         return compare;
>  }
>
> +static __plt_always_inline uint64_t
> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
> +{
> +       PLT_SET_USED(swap);
> +       PLT_SET_USED(ptr);
> +
> +       return compare;
> +}
> +
>  static inline uint64_t
>  roc_atomic64_add_nosync(int64_t incr, int64_t *ptr)
>  {
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index 46350fdb48..19b9a9352c 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -218,7 +218,7 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
>         cmp.compare_s.dis_wait = dis_wait;
>         cmp.compare_s.count = num;
>
> -       res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
> +       res = roc_atomic64_casl(cmp.u, (uint64_t)buf, addr);
>         if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
>                 return -1;
>
> --
> 2.32.0
>
  
Ferruh Yigit Jan. 11, 2022, 12:08 p.m. UTC | #2
On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
> Before issuing the batch alloc, we clear the first word of
> cache lines so that NPA can update the status. Make sure that
> this line clear is flushed before the batch alloc is issued.
> 
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> ---
>   drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
>   drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
>   drivers/common/cnxk/roc_npa.h        |  2 +-
>   3 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
> index fe5f7f46d0..4f15503c29 100644
> --- a/drivers/common/cnxk/roc_io.h
> +++ b/drivers/common/cnxk/roc_io.h
> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
>   	return compare;
>   }
>   
> +static __plt_always_inline uint64_t
> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
> +{
> +	asm volatile(PLT_CPU_FEATURE_PREAMBLE
> +		     "casl %[compare], %[swap], [%[ptr]]\n"
> +		     : [compare] "+r"(compare)
> +		     : [swap] "r"(swap), [ptr] "r"(ptr)
> +		     : "memory");
> +

out of curiosity, what is the "cas with release semantics"?
briefly, what is the difference between 'cas' and 'casl'?
  
Ferruh Yigit Jan. 11, 2022, 12:12 p.m. UTC | #3
On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
> On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
>> Before issuing the batch alloc, we clear the first word of
>> cache lines so that NPA can update the status. Make sure that
>> this line clear is flushed before the batch alloc is issued.
>>
>> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
>> ---
>>   drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
>>   drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
>>   drivers/common/cnxk/roc_npa.h        |  2 +-
>>   3 files changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
>> index fe5f7f46d0..4f15503c29 100644
>> --- a/drivers/common/cnxk/roc_io.h
>> +++ b/drivers/common/cnxk/roc_io.h
>> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
>>       return compare;
>>   }
>> +static __plt_always_inline uint64_t
>> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
>> +{
>> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
>> +             "casl %[compare], %[swap], [%[ptr]]\n"
>> +             : [compare] "+r"(compare)
>> +             : [swap] "r"(swap), [ptr] "r"(ptr)
>> +             : "memory");
>> +
> 
> out of curiosity, what is the "cas with release semantics"?
> briefly, what is the difference between 'cas' and 'casl'?

+ Honnappa & Ruifeng,

Isn't this API Arm wide, instead of being cnxk specific?
Does it make sense to make this API for arm and cnxk use from there?
  
Ashwin Sekhar T K Jan. 11, 2022, 12:26 p.m. UTC | #4
CAS is compare and swap. CASL is compare and swap with release semantics.

But on CNXK platform, the functionality of CAS* instructions is completely different when it is done to specific addresses. These APIs are meant for use for such special cases. These cannot be made ARM generic.

Ashwin Sekhar T K 

> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Tuesday, January 11, 2022 5:42 PM
> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>
> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Jerin Jacob
> Kollanukkaran <jerinj@marvell.com>; Sunil Kumar Kori
> <skori@marvell.com>; Satha Koteswara Rao Kottidi
> <skoteshwar@marvell.com>; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
> Subject: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics for
> batch alloc
> 
> External Email
> 
> ----------------------------------------------------------------------
> On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
> > On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
> >> Before issuing the batch alloc, we clear the first word of cache
> >> lines so that NPA can update the status. Make sure that this line
> >> clear is flushed before the batch alloc is issued.
> >>
> >> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> >> ---
> >>   drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
> >>   drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
> >>   drivers/common/cnxk/roc_npa.h        |  2 +-
> >>   3 files changed, 22 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/common/cnxk/roc_io.h
> >> b/drivers/common/cnxk/roc_io.h index fe5f7f46d0..4f15503c29 100644
> >> --- a/drivers/common/cnxk/roc_io.h
> >> +++ b/drivers/common/cnxk/roc_io.h
> >> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t
> swap,
> >> int64_t *ptr)
> >>       return compare;
> >>   }
> >> +static __plt_always_inline uint64_t
> >> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr) {
> >> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
> >> +             "casl %[compare], %[swap], [%[ptr]]\n"
> >> +             : [compare] "+r"(compare)
> >> +             : [swap] "r"(swap), [ptr] "r"(ptr)
> >> +             : "memory");
> >> +
> >
> > out of curiosity, what is the "cas with release semantics"?
> > briefly, what is the difference between 'cas' and 'casl'?
> 
> + Honnappa & Ruifeng,
> 
> Isn't this API Arm wide, instead of being cnxk specific?
> Does it make sense to make this API for arm and cnxk use from there?
  
Ferruh Yigit Jan. 11, 2022, 1:46 p.m. UTC | #5
On 1/11/2022 12:26 PM, Ashwin Sekhar Thalakalath Kottilveetil wrote:
> CAS is compare and swap. CASL is compare and swap with release semantics.
> 

What does 'release semantics' mean? What is functional difference in both?

> But on CNXK platform, the functionality of CAS* instructions is completely different when it is done to specific addresses. These APIs are meant for use for such special cases. These cannot be made ARM generic.
> 
> Ashwin Sekhar T K
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>> Sent: Tuesday, January 11, 2022 5:42 PM
>> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
>> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
>> Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>
>> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Jerin Jacob
>> Kollanukkaran <jerinj@marvell.com>; Sunil Kumar Kori
>> <skori@marvell.com>; Satha Koteswara Rao Kottidi
>> <skoteshwar@marvell.com>; Pavan Nikhilesh Bhagavatula
>> <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
>> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
>> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
>> Subject: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics for
>> batch alloc
>>
>> External Email
>>
>> ----------------------------------------------------------------------
>> On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
>>> On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
>>>> Before issuing the batch alloc, we clear the first word of cache
>>>> lines so that NPA can update the status. Make sure that this line
>>>> clear is flushed before the batch alloc is issued.
>>>>
>>>> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
>>>> ---
>>>>    drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
>>>>    drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
>>>>    drivers/common/cnxk/roc_npa.h        |  2 +-
>>>>    3 files changed, 22 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/common/cnxk/roc_io.h
>>>> b/drivers/common/cnxk/roc_io.h index fe5f7f46d0..4f15503c29 100644
>>>> --- a/drivers/common/cnxk/roc_io.h
>>>> +++ b/drivers/common/cnxk/roc_io.h
>>>> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t
>> swap,
>>>> int64_t *ptr)
>>>>        return compare;
>>>>    }
>>>> +static __plt_always_inline uint64_t
>>>> +roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr) {
>>>> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
>>>> +             "casl %[compare], %[swap], [%[ptr]]\n"
>>>> +             : [compare] "+r"(compare)
>>>> +             : [swap] "r"(swap), [ptr] "r"(ptr)
>>>> +             : "memory");
>>>> +
>>>
>>> out of curiosity, what is the "cas with release semantics"?
>>> briefly, what is the difference between 'cas' and 'casl'?
>>
>> + Honnappa & Ruifeng,
>>
>> Isn't this API Arm wide, instead of being cnxk specific?
>> Does it make sense to make this API for arm and cnxk use from there?
  
Ruifeng Wang Jan. 12, 2022, 3:01 a.m. UTC | #6
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@intel.com>
> Sent: Tuesday, January 11, 2022 9:46 PM
> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
> jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
> Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
> Subject: Re: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics
> for batch alloc
> 
> On 1/11/2022 12:26 PM, Ashwin Sekhar Thalakalath Kottilveetil wrote:
> > CAS is compare and swap. CASL is compare and swap with release
> semantics.
> >
> 
> What does 'release semantics' mean? What is functional difference in both?

'release semantics' is semantics in memory ordering for store operations. 
It ensures store-store ordering.

And some comments below.
> 
> > But on CNXK platform, the functionality of CAS* instructions is completely
> different when it is done to specific addresses. These APIs are meant for use
> for such special cases. These cannot be made ARM generic.
> >
> > Ashwin Sekhar T K
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@intel.com>
> >> Sent: Tuesday, January 11, 2022 5:42 PM
> >> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
> >> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> >> Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>
> >> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Jerin Jacob
> >> Kollanukkaran <jerinj@marvell.com>; Sunil Kumar Kori
> >> <skori@marvell.com>; Satha Koteswara Rao Kottidi
> >> <skoteshwar@marvell.com>; Pavan Nikhilesh Bhagavatula
> >> <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> >> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
> >> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
> >> Subject: [EXT] Re: [PATCH] common/cnxk: use cas with release
> >> semantics for batch alloc
> >>
> >> External Email
> >>
> >> ---------------------------------------------------------------------
> >> - On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
> >>> On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
> >>>> Before issuing the batch alloc, we clear the first word of cache
> >>>> lines so that NPA can update the status. Make sure that this line
> >>>> clear is flushed before the batch alloc is issued.
> >>>>
> >>>> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> >>>> ---
> >>>>    drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
> >>>>    drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
> >>>>    drivers/common/cnxk/roc_npa.h        |  2 +-
> >>>>    3 files changed, 22 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/common/cnxk/roc_io.h
> >>>> b/drivers/common/cnxk/roc_io.h index fe5f7f46d0..4f15503c29 100644
> >>>> --- a/drivers/common/cnxk/roc_io.h
> >>>> +++ b/drivers/common/cnxk/roc_io.h
> >>>> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t
> >> swap,
> >>>> int64_t *ptr)
> >>>>        return compare;
> >>>>    }
> >>>> +static __plt_always_inline uint64_t roc_atomic64_casl(uint64_t
> >>>> +compare, uint64_t swap, int64_t *ptr) {
> >>>> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
> >>>> +             "casl %[compare], %[swap], [%[ptr]]\n"
> >>>> +             : [compare] "+r"(compare)
> >>>> +             : [swap] "r"(swap), [ptr] "r"(ptr)
> >>>> +             : "memory");
> >>>> +
> >>>
> >>> out of curiosity, what is the "cas with release semantics"?
> >>> briefly, what is the difference between 'cas' and 'casl'?
> >>
> >> + Honnappa & Ruifeng,

Thanks Ferruh for adding me in this loop.
> >>
> >> Isn't this API Arm wide, instead of being cnxk specific?
> >> Does it make sense to make this API for arm and cnxk use from there?

Yes, CAS operation can be used Arm wide.
Generally, CAS is available via __atomic_compare_exchange/_n() compiler built-ins. This is the way we use
atomic in DPDK. So there is no need to add another generic API.
  
Ruifeng Wang Jan. 12, 2022, 6:18 a.m. UTC | #7
> -----Original Message-----
> From: Ruifeng Wang <Ruifeng.Wang@arm.com>
> Sent: Wednesday, January 12, 2022 11:01 AM
> To: Ferruh Yigit <ferruh.yigit@intel.com>; Ashwin Sekhar Thalakalath
> Kottilveetil <asekhar@marvell.com>; dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>
> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
> jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
> Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>;
> nd <nd@arm.com>
> Subject: RE: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics
> for batch alloc
> 
> > -----Original Message-----
> > From: Ferruh Yigit <ferruh.yigit@intel.com>
> > Sent: Tuesday, January 11, 2022 9:46 PM
> > To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
> > dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> > Ruifeng Wang <Ruifeng.Wang@arm.com>
> > Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
> > jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
> > Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
> > Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> > <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
> > Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
> > Subject: Re: [EXT] Re: [PATCH] common/cnxk: use cas with release
> > semantics for batch alloc
> >
> > On 1/11/2022 12:26 PM, Ashwin Sekhar Thalakalath Kottilveetil wrote:
> > > CAS is compare and swap. CASL is compare and swap with release
> > semantics.
> > >
> >
> > What does 'release semantics' mean? What is functional difference in both?
> 
> 'release semantics' is semantics in memory ordering for store operations.
> It ensures store-store ordering.
> 
> And some comments below.
> >
> > > But on CNXK platform, the functionality of CAS* instructions is
> > > completely
> > different when it is done to specific addresses. These APIs are meant
> > for use for such special cases. These cannot be made ARM generic.
> > >
> > > Ashwin Sekhar T K
> > >
> > >> -----Original Message-----
> > >> From: Ferruh Yigit <ferruh.yigit@intel.com>
> > >> Sent: Tuesday, January 11, 2022 5:42 PM
> > >> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
> > >> dev@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>;
> > >> Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>
> > >> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Jerin Jacob
> > >> Kollanukkaran <jerinj@marvell.com>; Sunil Kumar Kori
> > >> <skori@marvell.com>; Satha Koteswara Rao Kottidi
> > >> <skoteshwar@marvell.com>; Pavan Nikhilesh Bhagavatula
> > >> <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
> > >> <kirankumark@marvell.com>; Satheesh Paul
> <psatheesh@marvell.com>;
> > >> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal
> <gakhil@marvell.com>
> > >> Subject: [EXT] Re: [PATCH] common/cnxk: use cas with release
> > >> semantics for batch alloc
> > >>
> > >> External Email
> > >>
> > >> -------------------------------------------------------------------
> > >> --
> > >> - On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
> > >>> On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
> > >>>> Before issuing the batch alloc, we clear the first word of cache
> > >>>> lines so that NPA can update the status. Make sure that this line
> > >>>> clear is flushed before the batch alloc is issued.
> > >>>>
> > >>>> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> > >>>> ---
> > >>>>    drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
> > >>>>    drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
> > >>>>    drivers/common/cnxk/roc_npa.h        |  2 +-
> > >>>>    3 files changed, 22 insertions(+), 1 deletion(-)
> > >>>>
> > >>>> diff --git a/drivers/common/cnxk/roc_io.h
> > >>>> b/drivers/common/cnxk/roc_io.h index fe5f7f46d0..4f15503c29
> > >>>> 100644
> > >>>> --- a/drivers/common/cnxk/roc_io.h
> > >>>> +++ b/drivers/common/cnxk/roc_io.h
> > >>>> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t
> > >> swap,
> > >>>> int64_t *ptr)
> > >>>>        return compare;
> > >>>>    }
> > >>>> +static __plt_always_inline uint64_t roc_atomic64_casl(uint64_t
> > >>>> +compare, uint64_t swap, int64_t *ptr) {
> > >>>> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
> > >>>> +             "casl %[compare], %[swap], [%[ptr]]\n"
> > >>>> +             : [compare] "+r"(compare)
> > >>>> +             : [swap] "r"(swap), [ptr] "r"(ptr)
> > >>>> +             : "memory");
> > >>>> +
> > >>>
> > >>> out of curiosity, what is the "cas with release semantics"?
> > >>> briefly, what is the difference between 'cas' and 'casl'?
> > >>
> > >> + Honnappa & Ruifeng,
> 
> Thanks Ferruh for adding me in this loop.
> > >>
> > >> Isn't this API Arm wide, instead of being cnxk specific?
> > >> Does it make sense to make this API for arm and cnxk use from there?
> 
> Yes, CAS operation can be used Arm wide.
> Generally, CAS is available via __atomic_compare_exchange/_n() compiler
> built-ins. This is the way we use atomic in DPDK. So there is no need to add
> another generic API.

Just to make my comment more clear.
For generic CAS operations, compiler built-ins can be used. No more API needed.
Given the special usage of the instructions in CNXK, the inline assembly here is not intended to be a wrapper of 
generic CAS operation but rather an interface to other hardware function. It doesn't make sense to make it Arm wide.

Thanks.
  
Ferruh Yigit Jan. 12, 2022, 9:20 a.m. UTC | #8
On 1/12/2022 6:18 AM, Ruifeng Wang wrote:
>> -----Original Message-----
>> From: Ruifeng Wang <Ruifeng.Wang@arm.com>
>> Sent: Wednesday, January 12, 2022 11:01 AM
>> To: Ferruh Yigit <ferruh.yigit@intel.com>; Ashwin Sekhar Thalakalath
>> Kottilveetil <asekhar@marvell.com>; dev@dpdk.org; Honnappa Nagarahalli
>> <Honnappa.Nagarahalli@arm.com>
>> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
>> jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
>> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
>> Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
>> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
>> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>;
>> nd <nd@arm.com>
>> Subject: RE: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics
>> for batch alloc
>>
>>> -----Original Message-----
>>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>>> Sent: Tuesday, January 11, 2022 9:46 PM
>>> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
>>> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
>>> Ruifeng Wang <Ruifeng.Wang@arm.com>
>>> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
>>> jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
>>> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
>>> Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
>>> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
>>> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
>>> Subject: Re: [EXT] Re: [PATCH] common/cnxk: use cas with release
>>> semantics for batch alloc
>>>
>>> On 1/11/2022 12:26 PM, Ashwin Sekhar Thalakalath Kottilveetil wrote:
>>>> CAS is compare and swap. CASL is compare and swap with release
>>> semantics.
>>>>
>>>
>>> What does 'release semantics' mean? What is functional difference in both?
>>
>> 'release semantics' is semantics in memory ordering for store operations.
>> It ensures store-store ordering.
>>
>> And some comments below.
>>>
>>>> But on CNXK platform, the functionality of CAS* instructions is
>>>> completely
>>> different when it is done to specific addresses. These APIs are meant
>>> for use for such special cases. These cannot be made ARM generic.
>>>>
>>>> Ashwin Sekhar T K
>>>>
>>>>> -----Original Message-----
>>>>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>>>>> Sent: Tuesday, January 11, 2022 5:42 PM
>>>>> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
>>>>> dev@dpdk.org; Honnappa Nagarahalli
>> <Honnappa.Nagarahalli@arm.com>;
>>>>> Ruifeng Wang (Arm Technology China) <Ruifeng.Wang@arm.com>
>>>>> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Jerin Jacob
>>>>> Kollanukkaran <jerinj@marvell.com>; Sunil Kumar Kori
>>>>> <skori@marvell.com>; Satha Koteswara Rao Kottidi
>>>>> <skoteshwar@marvell.com>; Pavan Nikhilesh Bhagavatula
>>>>> <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
>>>>> <kirankumark@marvell.com>; Satheesh Paul
>> <psatheesh@marvell.com>;
>>>>> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal
>> <gakhil@marvell.com>
>>>>> Subject: [EXT] Re: [PATCH] common/cnxk: use cas with release
>>>>> semantics for batch alloc
>>>>>
>>>>> External Email
>>>>>
>>>>> -------------------------------------------------------------------
>>>>> --
>>>>> - On 1/11/2022 12:08 PM, Ferruh Yigit wrote:
>>>>>> On 11/30/2021 5:45 AM, Ashwin Sekhar T K wrote:
>>>>>>> Before issuing the batch alloc, we clear the first word of cache
>>>>>>> lines so that NPA can update the status. Make sure that this line
>>>>>>> clear is flushed before the batch alloc is issued.
>>>>>>>
>>>>>>> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
>>>>>>> ---
>>>>>>>     drivers/common/cnxk/roc_io.h         | 12 ++++++++++++
>>>>>>>     drivers/common/cnxk/roc_io_generic.h |  9 +++++++++
>>>>>>>     drivers/common/cnxk/roc_npa.h        |  2 +-
>>>>>>>     3 files changed, 22 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/common/cnxk/roc_io.h
>>>>>>> b/drivers/common/cnxk/roc_io.h index fe5f7f46d0..4f15503c29
>>>>>>> 100644
>>>>>>> --- a/drivers/common/cnxk/roc_io.h
>>>>>>> +++ b/drivers/common/cnxk/roc_io.h
>>>>>>> @@ -78,6 +78,18 @@ roc_atomic64_cas(uint64_t compare, uint64_t
>>>>> swap,
>>>>>>> int64_t *ptr)
>>>>>>>         return compare;
>>>>>>>     }
>>>>>>> +static __plt_always_inline uint64_t roc_atomic64_casl(uint64_t
>>>>>>> +compare, uint64_t swap, int64_t *ptr) {
>>>>>>> +    asm volatile(PLT_CPU_FEATURE_PREAMBLE
>>>>>>> +             "casl %[compare], %[swap], [%[ptr]]\n"
>>>>>>> +             : [compare] "+r"(compare)
>>>>>>> +             : [swap] "r"(swap), [ptr] "r"(ptr)
>>>>>>> +             : "memory");
>>>>>>> +
>>>>>>
>>>>>> out of curiosity, what is the "cas with release semantics"?
>>>>>> briefly, what is the difference between 'cas' and 'casl'?
>>>>>
>>>>> + Honnappa & Ruifeng,
>>
>> Thanks Ferruh for adding me in this loop.
>>>>>
>>>>> Isn't this API Arm wide, instead of being cnxk specific?
>>>>> Does it make sense to make this API for arm and cnxk use from there?
>>
>> Yes, CAS operation can be used Arm wide.
>> Generally, CAS is available via __atomic_compare_exchange/_n() compiler
>> built-ins. This is the way we use atomic in DPDK. So there is no need to add
>> another generic API.
> 
> Just to make my comment more clear.
> For generic CAS operations, compiler built-ins can be used. No more API needed.
> Given the special usage of the instructions in CNXK, the inline assembly here is not intended to be a wrapper of
> generic CAS operation but rather an interface to other hardware function. It doesn't make sense to make it Arm wide.
> 

Got it. Thanks for clarification, I will continue with the set.
  
Ferruh Yigit Jan. 12, 2022, 9:21 a.m. UTC | #9
On 1/12/2022 3:01 AM, Ruifeng Wang wrote:
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@intel.com>
>> Sent: Tuesday, January 11, 2022 9:46 PM
>> To: Ashwin Sekhar Thalakalath Kottilveetil <asekhar@marvell.com>;
>> dev@dpdk.org; Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
>> Ruifeng Wang <Ruifeng.Wang@arm.com>
>> Cc: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>;
>> jerinj@marvell.com; Sunil Kumar Kori <skori@marvell.com>; Satha
>> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Pavan Nikhilesh
>> Bhagavatula <pbhagavatula@marvell.com>; Kiran Kumar Kokkilagadda
>> <kirankumark@marvell.com>; Satheesh Paul <psatheesh@marvell.com>;
>> Anoob Joseph <anoobj@marvell.com>; Akhil Goyal <gakhil@marvell.com>
>> Subject: Re: [EXT] Re: [PATCH] common/cnxk: use cas with release semantics
>> for batch alloc
>>
>> On 1/11/2022 12:26 PM, Ashwin Sekhar Thalakalath Kottilveetil wrote:
>>> CAS is compare and swap. CASL is compare and swap with release
>> semantics.
>>>
>>
>> What does 'release semantics' mean? What is functional difference in both?
> 
> 'release semantics' is semantics in memory ordering for store operations.
> It ensures store-store ordering.
> 

Thanks.
  

Patch

diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
index fe5f7f46d0..4f15503c29 100644
--- a/drivers/common/cnxk/roc_io.h
+++ b/drivers/common/cnxk/roc_io.h
@@ -78,6 +78,18 @@  roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
 	return compare;
 }
 
+static __plt_always_inline uint64_t
+roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
+{
+	asm volatile(PLT_CPU_FEATURE_PREAMBLE
+		     "casl %[compare], %[swap], [%[ptr]]\n"
+		     : [compare] "+r"(compare)
+		     : [swap] "r"(swap), [ptr] "r"(ptr)
+		     : "memory");
+
+	return compare;
+}
+
 static __plt_always_inline uint64_t
 roc_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 {
diff --git a/drivers/common/cnxk/roc_io_generic.h b/drivers/common/cnxk/roc_io_generic.h
index ceaa3a38d8..5f90835c09 100644
--- a/drivers/common/cnxk/roc_io_generic.h
+++ b/drivers/common/cnxk/roc_io_generic.h
@@ -41,6 +41,15 @@  roc_atomic64_cas(uint64_t compare, uint64_t swap, int64_t *ptr)
 	return compare;
 }
 
+static __plt_always_inline uint64_t
+roc_atomic64_casl(uint64_t compare, uint64_t swap, int64_t *ptr)
+{
+	PLT_SET_USED(swap);
+	PLT_SET_USED(ptr);
+
+	return compare;
+}
+
 static inline uint64_t
 roc_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 {
diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index 46350fdb48..19b9a9352c 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -218,7 +218,7 @@  roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
 	cmp.compare_s.dis_wait = dis_wait;
 	cmp.compare_s.count = num;
 
-	res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
+	res = roc_atomic64_casl(cmp.u, (uint64_t)buf, addr);
 	if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
 		return -1;