[v3] common/cnxk: add new APIs for batch operations

Message ID 20230530091251.1040406-1-asekhar@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [v3] common/cnxk: add new APIs for batch operations |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-testing warning apply patch failure

Commit Message

Ashwin Sekhar T K May 30, 2023, 9:12 a.m. UTC
  Add new APIs for counting and extracting allocated objects
from a single cache line in the batch alloc memory.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 11 deletions(-)
  

Comments

Jerin Jacob May 30, 2023, 4:51 p.m. UTC | #1
On Tue, May 30, 2023 at 2:43 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Add new APIs for counting and extracting allocated objects
> from a single cache line in the batch alloc memory.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>

Applied to dpdk-next-net-mrvl/for-next-net. Thanks


> ---
>  drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
>  1 file changed, 67 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index e1e164499e..4ad5f044b5 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -209,7 +209,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
>                                unsigned int num, const int dis_wait,
>                                const int drop)
>  {
> -       unsigned int i;
>         int64_t *addr;
>         uint64_t res;
>         union {
> @@ -220,10 +219,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
>         if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
>                 return -1;
>
> -       /* Zero first word of every cache line */
> -       for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
> -               buf[i] = 0;
> -
>         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
>                            NPA_LF_AURA_BATCH_ALLOC);
>         cmp.u = 0;
> @@ -240,6 +235,9 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
>         return 0;
>  }
>
> +/*
> + * Wait for a batch alloc operation on a cache line to complete.
> + */
>  static inline void
>  roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
>  {
> @@ -255,6 +253,23 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
>                         break;
>  }
>
> +/*
> + * Count the number of pointers in a single batch alloc cache line.
> + */
> +static inline unsigned int
> +roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
> +{
> +       struct npa_batch_alloc_status_s *status;
> +
> +       status = (struct npa_batch_alloc_status_s *)line;
> +       roc_npa_batch_alloc_wait(line, wait_us);
> +
> +       return status->count;
> +}
> +
> +/*
> + * Count the number of pointers in a sequence of batch alloc cache lines.
> + */
>  static inline unsigned int
>  roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
>                                unsigned int wait_us)
> @@ -279,6 +294,40 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
>         return count;
>  }
>
> +/*
> + * Extract allocated pointers from a single batch alloc cache line. This api
> + * only extracts the required number of pointers from the cache line and it
> + * adjusts the statsus->count so that a subsequent call to this api can
> + * extract the remaining pointers in the cache line appropriately.
> + */
> +static inline unsigned int
> +roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
> +                                     unsigned int num, unsigned int *rem)
> +{
> +       struct npa_batch_alloc_status_s *status;
> +       unsigned int avail;
> +
> +       status = (struct npa_batch_alloc_status_s *)line;
> +       roc_npa_batch_alloc_wait(line, 0);
> +       avail = status->count;
> +       num = avail > num ? num : avail;
> +       if (num)
> +               memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
> +       avail -= num;
> +       if (avail == 0) {
> +               /* Clear the lowest 7 bits of the first pointer */
> +               buf[0] &= ~0x7FUL;
> +               status->ccode = 0;
> +       }
> +       status->count = avail;
> +       *rem = avail;
> +
> +       return num;
> +}
> +
> +/*
> + * Extract all allocated pointers from a sequence of batch alloc cache lines.
> + */
>  static inline unsigned int
>  roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
>                                  unsigned int num)
> @@ -330,11 +379,15 @@ roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
>         }
>  }
>
> +/*
> + * Issue a batch alloc operation on a sequence of cache lines, wait for the
> + * batch alloc to complete and copy the pointers out into the user buffer.
> + */
>  static inline unsigned int
>  roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
> -                           uint64_t *aligned_buf, unsigned int num,
> -                           const int dis_wait, const int drop,
> -                           const int partial)
> +                           unsigned int num, uint64_t *aligned_buf,
> +                           unsigned int aligned_buf_sz, const int dis_wait,
> +                           const int drop, const int partial)
>  {
>         unsigned int count, chunk, num_alloc;
>
> @@ -344,9 +397,12 @@ roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
>
>         count = 0;
>         while (num) {
> -               chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
> -                                     ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
> -                                     num;
> +               /* Make sure that the pointers allocated fit into the cache
> +                * lines reserved.
> +                */
> +               chunk = aligned_buf_sz / sizeof(uint64_t);
> +               chunk = PLT_MIN(num, chunk);
> +               chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
>
>                 if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
>                                                    chunk, dis_wait, drop))
> --
> 2.25.1
>
  

Patch

diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index e1e164499e..4ad5f044b5 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -209,7 +209,6 @@  roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
 			       unsigned int num, const int dis_wait,
 			       const int drop)
 {
-	unsigned int i;
 	int64_t *addr;
 	uint64_t res;
 	union {
@@ -220,10 +219,6 @@  roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
 	if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
 		return -1;
 
-	/* Zero first word of every cache line */
-	for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
-		buf[i] = 0;
-
 	addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
 			   NPA_LF_AURA_BATCH_ALLOC);
 	cmp.u = 0;
@@ -240,6 +235,9 @@  roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
 	return 0;
 }
 
+/*
+ * Wait for a batch alloc operation on a cache line to complete.
+ */
 static inline void
 roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
 {
@@ -255,6 +253,23 @@  roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
 			break;
 }
 
+/*
+ * Count the number of pointers in a single batch alloc cache line.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
+{
+	struct npa_batch_alloc_status_s *status;
+
+	status = (struct npa_batch_alloc_status_s *)line;
+	roc_npa_batch_alloc_wait(line, wait_us);
+
+	return status->count;
+}
+
+/*
+ * Count the number of pointers in a sequence of batch alloc cache lines.
+ */
 static inline unsigned int
 roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
 			       unsigned int wait_us)
@@ -279,6 +294,40 @@  roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
 	return count;
 }
 
+/*
+ * Extract allocated pointers from a single batch alloc cache line. This api
+ * only extracts the required number of pointers from the cache line and it
+ * adjusts the statsus->count so that a subsequent call to this api can
+ * extract the remaining pointers in the cache line appropriately.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
+				      unsigned int num, unsigned int *rem)
+{
+	struct npa_batch_alloc_status_s *status;
+	unsigned int avail;
+
+	status = (struct npa_batch_alloc_status_s *)line;
+	roc_npa_batch_alloc_wait(line, 0);
+	avail = status->count;
+	num = avail > num ? num : avail;
+	if (num)
+		memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
+	avail -= num;
+	if (avail == 0) {
+		/* Clear the lowest 7 bits of the first pointer */
+		buf[0] &= ~0x7FUL;
+		status->ccode = 0;
+	}
+	status->count = avail;
+	*rem = avail;
+
+	return num;
+}
+
+/*
+ * Extract all allocated pointers from a sequence of batch alloc cache lines.
+ */
 static inline unsigned int
 roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
 				 unsigned int num)
@@ -330,11 +379,15 @@  roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
 	}
 }
 
+/*
+ * Issue a batch alloc operation on a sequence of cache lines, wait for the
+ * batch alloc to complete and copy the pointers out into the user buffer.
+ */
 static inline unsigned int
 roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
-			    uint64_t *aligned_buf, unsigned int num,
-			    const int dis_wait, const int drop,
-			    const int partial)
+			    unsigned int num, uint64_t *aligned_buf,
+			    unsigned int aligned_buf_sz, const int dis_wait,
+			    const int drop, const int partial)
 {
 	unsigned int count, chunk, num_alloc;
 
@@ -344,9 +397,12 @@  roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
 
 	count = 0;
 	while (num) {
-		chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
-				      ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
-				      num;
+		/* Make sure that the pointers allocated fit into the cache
+		 * lines reserved.
+		 */
+		chunk = aligned_buf_sz / sizeof(uint64_t);
+		chunk = PLT_MIN(num, chunk);
+		chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
 
 		if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
 						   chunk, dis_wait, drop))