common/cnxk: enable lmtst burst for batch free

Message ID 20211204103046.3353646-1-asekhar@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series common/cnxk: enable lmtst burst for batch free |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS

Commit Message

Ashwin Sekhar T K Dec. 4, 2021, 10:30 a.m. UTC
  Use lmtst burst when more than 15 pointers is requested
to be freed.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 drivers/common/cnxk/roc_npa.h | 86 +++++++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 18 deletions(-)
  

Comments

Jerin Jacob Jan. 19, 2022, 12:06 p.m. UTC | #1
On Sat, Dec 4, 2021 at 4:02 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Use lmtst burst when more than 15 pointers is requested
> to be freed.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>

Acked-by: Jerin Jacob <jerinj@marvell.com>
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

Changed the commit as:

    common/cnxk: enable LMTST burst for batch free

    Use LMTST burst as a performance optimization when more than
    15 pointers are requested to be freed.

    Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
    Acked-by: Jerin Jacob <jerinj@marvell.com>

> ---
>  drivers/common/cnxk/roc_npa.h | 86 +++++++++++++++++++++++++++--------
>  1 file changed, 68 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index 46350fdb48..dfe6e5318f 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -10,7 +10,8 @@
>
>  #define ROC_NPA_MAX_BLOCK_SZ              (128 * 1024)
>  #define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
> -#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15
> +#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15U
> +#define ROC_CN10K_NPA_BATCH_FREE_BURST_MAX 16U
>
>  /* This value controls how much of the present average resource level is used to
>   * calculate the new resource level.
> @@ -362,9 +363,6 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
>         volatile uint64_t *lmt_data;
>         unsigned int i;
>
> -       if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
> -               return;
> -
>         lmt_data = (uint64_t *)lmt_addr;
>
>         addr = roc_npa_aura_handle_to_base(aura_handle) +
> @@ -379,10 +377,8 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
>          * -----------------------------------------
>          */
>         free0 = roc_npa_aura_handle_to_aura(aura_handle);
> -       if (fabs)
> -               free0 |= (0x1UL << 63);
> -       if (num & 0x1)
> -               free0 |= (0x1UL << 32);
> +       free0 |= ((uint64_t)!!fabs << 63);
> +       free0 |= ((uint64_t)(num & 0x1) << 32);
>
>         /* tar_addr[4:6] is LMTST size-1 in units of 128b */
>         tar_addr = addr | ((num >> 1) << 4);
> @@ -396,23 +392,77 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
>  }
>
>  static inline void
> -roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
> -                          unsigned int num, const int fabs, uint64_t lmt_addr,
> -                          uint64_t lmt_id)
> +roc_npa_aura_batch_free_burst(uint64_t aura_handle, uint64_t const *buf,
> +                             unsigned int num, const int fabs,
> +                             uint64_t lmt_addr, uint64_t lmt_id)
>  {
> -       unsigned int chunk;
> +       uint64_t addr, tar_addr, free0, send_data, lmtline;
> +       uint64_t *lmt_data;
> +
> +       /* 63   52 51  20 19   7 6           4 3  0
> +        * ----------------------------------------
> +        * | RSVD | ADDR | RSVD | LMTST SZ(0) | 0 |
> +        * ----------------------------------------
> +        */
> +       addr = roc_npa_aura_handle_to_base(aura_handle) +
> +              NPA_LF_AURA_BATCH_FREE0;
> +       tar_addr = addr | (0x7 << 4);
> +
> +       /* 63   63 62  33 32       32 31  20 19    0
> +        * -----------------------------------------
> +        * | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
> +        * -----------------------------------------
> +        */
> +       free0 = roc_npa_aura_handle_to_aura(aura_handle);
> +       free0 |= ((uint64_t)!!fabs << 63);
> +       free0 |= (0x1UL << 32);
>
> +       /* Fill the lmt lines */
> +       lmt_data = (uint64_t *)lmt_addr;
> +       lmtline = 0;
>         while (num) {
> -               chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
> -                                     ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
> -                                     num;
> +               lmt_data[lmtline * 16] = free0;
> +               memcpy(&lmt_data[(lmtline * 16) + 1], buf,
> +                      ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS * sizeof(uint64_t));
> +               lmtline++;
> +               num -= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +               buf += ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +       }
>
> -               roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
> -                                       lmt_id);
> +       /* 63                           19 18  16 15   12 11  11 10      0
> +        * ---------------------------------------------------------------
> +        * | LMTST SZ(15) ... LMTST SZ(1) | Rsvd | CNTM1 | Rsvd | LMT_ID |
> +        * ---------------------------------------------------------------
> +        */
> +       send_data = lmt_id | ((lmtline - 1) << 12) | (0x1FFFFFFFFFFFUL << 19);
> +       roc_lmt_submit_steorl(send_data, tar_addr);
> +       plt_io_wmb();
> +}
>
> +static inline void
> +roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
> +                          unsigned int num, const int fabs, uint64_t lmt_addr,
> +                          uint64_t lmt_id)
> +{
> +       unsigned int max_burst, chunk, bnum;
> +
> +       max_burst = ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS *
> +                   ROC_CN10K_NPA_BATCH_FREE_BURST_MAX;
> +       bnum = num / ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +       bnum *= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +       num -= bnum;
> +
> +       while (bnum) {
> +               chunk = (bnum >= max_burst) ? max_burst : bnum;
> +               roc_npa_aura_batch_free_burst(aura_handle, buf, chunk, fabs,
> +                                             lmt_addr, lmt_id);
>                 buf += chunk;
> -               num -= chunk;
> +               bnum -= chunk;
>         }
> +
> +       if (num)
> +               roc_npa_aura_batch_free(aura_handle, buf, num, fabs, lmt_addr,
> +                                       lmt_id);
>  }
>
>  static inline unsigned int
> --
> 2.32.0
>
  

Patch

diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index 46350fdb48..dfe6e5318f 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -10,7 +10,8 @@ 
 
 #define ROC_NPA_MAX_BLOCK_SZ		   (128 * 1024)
 #define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
-#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15
+#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15U
+#define ROC_CN10K_NPA_BATCH_FREE_BURST_MAX 16U
 
 /* This value controls how much of the present average resource level is used to
  * calculate the new resource level.
@@ -362,9 +363,6 @@  roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
 	volatile uint64_t *lmt_data;
 	unsigned int i;
 
-	if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
-		return;
-
 	lmt_data = (uint64_t *)lmt_addr;
 
 	addr = roc_npa_aura_handle_to_base(aura_handle) +
@@ -379,10 +377,8 @@  roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
 	 * -----------------------------------------
 	 */
 	free0 = roc_npa_aura_handle_to_aura(aura_handle);
-	if (fabs)
-		free0 |= (0x1UL << 63);
-	if (num & 0x1)
-		free0 |= (0x1UL << 32);
+	free0 |= ((uint64_t)!!fabs << 63);
+	free0 |= ((uint64_t)(num & 0x1) << 32);
 
 	/* tar_addr[4:6] is LMTST size-1 in units of 128b */
 	tar_addr = addr | ((num >> 1) << 4);
@@ -396,23 +392,77 @@  roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
 }
 
 static inline void
-roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
-			   unsigned int num, const int fabs, uint64_t lmt_addr,
-			   uint64_t lmt_id)
+roc_npa_aura_batch_free_burst(uint64_t aura_handle, uint64_t const *buf,
+			      unsigned int num, const int fabs,
+			      uint64_t lmt_addr, uint64_t lmt_id)
 {
-	unsigned int chunk;
+	uint64_t addr, tar_addr, free0, send_data, lmtline;
+	uint64_t *lmt_data;
+
+	/* 63   52 51  20 19   7 6           4 3  0
+	 * ----------------------------------------
+	 * | RSVD | ADDR | RSVD | LMTST SZ(0) | 0 |
+	 * ----------------------------------------
+	 */
+	addr = roc_npa_aura_handle_to_base(aura_handle) +
+	       NPA_LF_AURA_BATCH_FREE0;
+	tar_addr = addr | (0x7 << 4);
+
+	/* 63   63 62  33 32       32 31  20 19    0
+	 * -----------------------------------------
+	 * | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
+	 * -----------------------------------------
+	 */
+	free0 = roc_npa_aura_handle_to_aura(aura_handle);
+	free0 |= ((uint64_t)!!fabs << 63);
+	free0 |= (0x1UL << 32);
 
+	/* Fill the lmt lines */
+	lmt_data = (uint64_t *)lmt_addr;
+	lmtline = 0;
 	while (num) {
-		chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
-				      ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
-				      num;
+		lmt_data[lmtline * 16] = free0;
+		memcpy(&lmt_data[(lmtline * 16) + 1], buf,
+		       ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS * sizeof(uint64_t));
+		lmtline++;
+		num -= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
+		buf += ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
+	}
 
-		roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
-					lmt_id);
+	/* 63                           19 18  16 15   12 11  11 10      0
+	 * ---------------------------------------------------------------
+	 * | LMTST SZ(15) ... LMTST SZ(1) | Rsvd | CNTM1 | Rsvd | LMT_ID |
+	 * ---------------------------------------------------------------
+	 */
+	send_data = lmt_id | ((lmtline - 1) << 12) | (0x1FFFFFFFFFFFUL << 19);
+	roc_lmt_submit_steorl(send_data, tar_addr);
+	plt_io_wmb();
+}
 
+static inline void
+roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
+			   unsigned int num, const int fabs, uint64_t lmt_addr,
+			   uint64_t lmt_id)
+{
+	unsigned int max_burst, chunk, bnum;
+
+	max_burst = ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS *
+		    ROC_CN10K_NPA_BATCH_FREE_BURST_MAX;
+	bnum = num / ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
+	bnum *= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
+	num -= bnum;
+
+	while (bnum) {
+		chunk = (bnum >= max_burst) ? max_burst : bnum;
+		roc_npa_aura_batch_free_burst(aura_handle, buf, chunk, fabs,
+					      lmt_addr, lmt_id);
 		buf += chunk;
-		num -= chunk;
+		bnum -= chunk;
 	}
+
+	if (num)
+		roc_npa_aura_batch_free(aura_handle, buf, num, fabs, lmt_addr,
+					lmt_id);
 }
 
 static inline unsigned int