[v7,1/2] dma/cnxk: rework DMA driver

Message ID 20230909163709.4718-1-pbhagavatula@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [v7,1/2] dma/cnxk: rework DMA driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Pavan Nikhilesh Bhagavatula Sept. 9, 2023, 4:37 p.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

To use the mempool cache, use rte_mempool for the DMA chunk pool.
Move the mempool creation to device start to limit the number of
chunks allocated based on the total number of descriptors
configured across all the vchans.

Remove unnecessary state tracking flags as the library handles it
and add the `CNXK` prefix to driver macros.

Convert the log register macro for all cnxk drivers to
RTE_LOG_REGISTER_DEFAULT.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: 29324

 v7 Changes:
 - Fix checkpatch warnings.
 v6 Changes:
 - Rework drvice configuration and start logic.
 - add CNXK prefix to driver macros.
 v5 Changes:
 - Use RTE_LOG_REGISTER_DEFAULT for registering logging.
 v4 Changes:
 - Fix clang build.
 v3 Changes:
 - Fix build.

 drivers/common/cnxk/roc_dpi.c      |  90 ++---------
 drivers/common/cnxk/roc_dpi.h      |  28 +---
 drivers/common/cnxk/roc_dpi_priv.h |   3 -
 drivers/common/cnxk/roc_platform.c |  21 +--
 drivers/common/cnxk/roc_platform.h |   2 +
 drivers/common/cnxk/version.map    |   1 +
 drivers/dma/cnxk/cnxk_dmadev.c     | 252 ++++++++++++++++-------------
 drivers/dma/cnxk/cnxk_dmadev.h     |  45 ++++--
 8 files changed, 203 insertions(+), 239 deletions(-)

--
2.25.1
  

Patch

diff --git a/drivers/common/cnxk/roc_dpi.c b/drivers/common/cnxk/roc_dpi.c
index 0e2f803077..c241168294 100644
--- a/drivers/common/cnxk/roc_dpi.c
+++ b/drivers/common/cnxk/roc_dpi.c
@@ -1,6 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(C) 2021 Marvell.
  */
+
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -52,17 +53,12 @@  roc_dpi_disable(struct roc_dpi *dpi)
 }

 int
-roc_dpi_configure(struct roc_dpi *roc_dpi)
+roc_dpi_configure(struct roc_dpi *roc_dpi, uint32_t chunk_sz, uint64_t aura, uint64_t chunk_base)
 {
 	struct plt_pci_device *pci_dev;
-	const struct plt_memzone *dpi_mz;
 	dpi_mbox_msg_t mbox_msg;
-	struct npa_pool_s pool;
-	struct npa_aura_s aura;
-	int rc, count, buflen;
-	uint64_t aura_handle;
-	plt_iova_t iova;
-	char name[32];
+	uint64_t reg;
+	int rc;

 	if (!roc_dpi) {
 		plt_err("roc_dpi is NULL");
@@ -70,79 +66,30 @@  roc_dpi_configure(struct roc_dpi *roc_dpi)
 	}

 	pci_dev = roc_dpi->pci_dev;
-	memset(&pool, 0, sizeof(struct npa_pool_s));
-	pool.nat_align = 1;
-
-	memset(&aura, 0, sizeof(aura));
-	rc = roc_npa_pool_create(&aura_handle, DPI_CMD_QUEUE_SIZE,
-				 DPI_CMD_QUEUE_BUFS, &aura, &pool, 0);
-	if (rc) {
-		plt_err("Failed to create NPA pool, err %d\n", rc);
-		return rc;
-	}
-
-	snprintf(name, sizeof(name), "dpimem%d:%d:%d:%d", pci_dev->addr.domain, pci_dev->addr.bus,
-		 pci_dev->addr.devid, pci_dev->addr.function);
-	buflen = DPI_CMD_QUEUE_SIZE * DPI_CMD_QUEUE_BUFS;
-	dpi_mz = plt_memzone_reserve_aligned(name, buflen, 0, DPI_CMD_QUEUE_SIZE);
-	if (dpi_mz == NULL) {
-		plt_err("dpi memzone reserve failed");
-		rc = -ENOMEM;
-		goto err1;
-	}
-
-	roc_dpi->mz = dpi_mz;
-	iova = dpi_mz->iova;
-	for (count = 0; count < DPI_CMD_QUEUE_BUFS; count++) {
-		roc_npa_aura_op_free(aura_handle, 0, iova);
-		iova += DPI_CMD_QUEUE_SIZE;
-	}
-
-	roc_dpi->chunk_base = (void *)roc_npa_aura_op_alloc(aura_handle, 0);
-	if (!roc_dpi->chunk_base) {
-		plt_err("Failed to alloc buffer from NPA aura");
-		rc = -ENOMEM;
-		goto err2;
-	}
-
-	roc_dpi->chunk_next = (void *)roc_npa_aura_op_alloc(aura_handle, 0);
-	if (!roc_dpi->chunk_next) {
-		plt_err("Failed to alloc buffer from NPA aura");
-		rc = -ENOMEM;
-		goto err2;
-	}

-	roc_dpi->aura_handle = aura_handle;
-	/* subtract 2 as they have already been alloc'ed above */
-	roc_dpi->pool_size_m1 = (DPI_CMD_QUEUE_SIZE >> 3) - 2;
+	roc_dpi_disable(roc_dpi);
+	reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR);
+	while (!(reg & BIT_ULL(63)))
+		reg = plt_read64(roc_dpi->rbase + DPI_VDMA_SADDR);

 	plt_write64(0x0, roc_dpi->rbase + DPI_VDMA_REQQ_CTL);
-	plt_write64(((uint64_t)(roc_dpi->chunk_base) >> 7) << 7,
-		    roc_dpi->rbase + DPI_VDMA_SADDR);
+	plt_write64(chunk_base, roc_dpi->rbase + DPI_VDMA_SADDR);
 	mbox_msg.u[0] = 0;
 	mbox_msg.u[1] = 0;
 	/* DPI PF driver expects vfid starts from index 0 */
 	mbox_msg.s.vfid = roc_dpi->vfid;
 	mbox_msg.s.cmd = DPI_QUEUE_OPEN;
-	mbox_msg.s.csize = DPI_CMD_QUEUE_SIZE;
-	mbox_msg.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
+	mbox_msg.s.csize = chunk_sz;
+	mbox_msg.s.aura = aura;
 	mbox_msg.s.sso_pf_func = idev_sso_pffunc_get();
 	mbox_msg.s.npa_pf_func = idev_npa_pffunc_get();

 	rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg,
 			    sizeof(dpi_mbox_msg_t));
-	if (rc < 0) {
+	if (rc < 0)
 		plt_err("Failed to send mbox message %d to DPI PF, err %d",
 			mbox_msg.s.cmd, rc);
-		goto err2;
-	}
-
-	return rc;

-err2:
-	plt_memzone_free(dpi_mz);
-err1:
-	roc_npa_pool_destroy(aura_handle);
 	return rc;
 }

@@ -153,11 +100,9 @@  roc_dpi_dev_init(struct roc_dpi *roc_dpi)
 	uint16_t vfid;

 	roc_dpi->rbase = pci_dev->mem_resource[0].addr;
-	vfid = ((pci_dev->addr.devid & 0x1F) << 3) |
-	       (pci_dev->addr.function & 0x7);
+	vfid = ((pci_dev->addr.devid & 0x1F) << 3) | (pci_dev->addr.function & 0x7);
 	vfid -= 1;
 	roc_dpi->vfid = vfid;
-	plt_spinlock_init(&roc_dpi->chunk_lock);

 	return 0;
 }
@@ -180,14 +125,9 @@  roc_dpi_dev_fini(struct roc_dpi *roc_dpi)
 	mbox_msg.s.vfid = roc_dpi->vfid;
 	mbox_msg.s.cmd = DPI_QUEUE_CLOSE;

-	rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg,
-			    sizeof(dpi_mbox_msg_t));
+	rc = send_msg_to_pf(&pci_dev->addr, (const char *)&mbox_msg, sizeof(dpi_mbox_msg_t));
 	if (rc < 0)
-		plt_err("Failed to send mbox message %d to DPI PF, err %d",
-			mbox_msg.s.cmd, rc);
-
-	roc_npa_pool_destroy(roc_dpi->aura_handle);
-	plt_memzone_free(roc_dpi->mz);
+		plt_err("Failed to send mbox message %d to DPI PF, err %d", mbox_msg.s.cmd, rc);

 	return rc;
 }
diff --git a/drivers/common/cnxk/roc_dpi.h b/drivers/common/cnxk/roc_dpi.h
index 2f061b07c5..4ebde5b8a6 100644
--- a/drivers/common/cnxk/roc_dpi.h
+++ b/drivers/common/cnxk/roc_dpi.h
@@ -5,41 +5,17 @@ 
 #ifndef _ROC_DPI_H_
 #define _ROC_DPI_H_

-struct roc_dpi_args {
-	uint8_t num_ssegs;
-	uint8_t num_dsegs;
-	uint8_t comp_type;
-	uint8_t direction;
-	uint8_t sdevice;
-	uint8_t ddevice;
-	uint8_t swap;
-	uint8_t use_lock : 1;
-	uint8_t tt : 7;
-	uint16_t func;
-	uint16_t grp;
-	uint32_t tag;
-	uint64_t comp_ptr;
-};
-
 struct roc_dpi {
-	/* Input parameters */
 	struct plt_pci_device *pci_dev;
-	/* End of Input parameters */
-	const struct plt_memzone *mz;
 	uint8_t *rbase;
 	uint16_t vfid;
-	uint16_t pool_size_m1;
-	uint16_t chunk_head;
-	uint64_t *chunk_base;
-	uint64_t *chunk_next;
-	uint64_t aura_handle;
-	plt_spinlock_t chunk_lock;
 } __plt_cache_aligned;

 int __roc_api roc_dpi_dev_init(struct roc_dpi *roc_dpi);
 int __roc_api roc_dpi_dev_fini(struct roc_dpi *roc_dpi);

-int __roc_api roc_dpi_configure(struct roc_dpi *dpi);
+int __roc_api roc_dpi_configure(struct roc_dpi *dpi, uint32_t chunk_sz, uint64_t aura,
+				uint64_t chunk_base);
 int __roc_api roc_dpi_enable(struct roc_dpi *dpi);
 int __roc_api roc_dpi_disable(struct roc_dpi *dpi);

diff --git a/drivers/common/cnxk/roc_dpi_priv.h b/drivers/common/cnxk/roc_dpi_priv.h
index 1fa1a715d3..518a3e7351 100644
--- a/drivers/common/cnxk/roc_dpi_priv.h
+++ b/drivers/common/cnxk/roc_dpi_priv.h
@@ -16,9 +16,6 @@ 
 #define DPI_REG_DUMP	0x3
 #define DPI_GET_REG_CFG 0x4

-#define DPI_CMD_QUEUE_SIZE 4096
-#define DPI_CMD_QUEUE_BUFS 1024
-
 typedef union dpi_mbox_msg_t {
 	uint64_t u[2];
 	struct dpi_mbox_message_s {
diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c
index f91b95ceab..a8a83a3723 100644
--- a/drivers/common/cnxk/roc_platform.c
+++ b/drivers/common/cnxk/roc_platform.c
@@ -60,14 +60,15 @@  roc_plt_init(void)
 	return 0;
 }

-RTE_LOG_REGISTER(cnxk_logtype_base, pmd.cnxk.base, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_mbox, pmd.cnxk.mbox, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_cpt, pmd.crypto.cnxk, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_ml, pmd.ml.cnxk, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_npa, pmd.mempool.cnxk, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_nix, pmd.net.cnxk, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_npc, pmd.net.cnxk.flow, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_sso, pmd.event.cnxk, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_tim, pmd.event.cnxk.timer, NOTICE);
-RTE_LOG_REGISTER(cnxk_logtype_tm, pmd.net.cnxk.tm, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_base, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_mbox, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_cpt, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_ml, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_npa, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_nix, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_npc, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_sso, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_tim, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_tm, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_dpi, NOTICE);
 RTE_LOG_REGISTER_DEFAULT(cnxk_logtype_ree, NOTICE);
diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h
index 08f83aba12..dfd4da21b6 100644
--- a/drivers/common/cnxk/roc_platform.h
+++ b/drivers/common/cnxk/roc_platform.h
@@ -242,6 +242,7 @@  extern int cnxk_logtype_sso;
 extern int cnxk_logtype_tim;
 extern int cnxk_logtype_tm;
 extern int cnxk_logtype_ree;
+extern int cnxk_logtype_dpi;

 #define plt_err(fmt, args...)                                                  \
 	RTE_LOG(ERR, PMD, "%s():%u " fmt "\n", __func__, __LINE__, ##args)
@@ -270,6 +271,7 @@  extern int cnxk_logtype_ree;
 #define plt_tim_dbg(fmt, ...)	plt_dbg(tim, fmt, ##__VA_ARGS__)
 #define plt_tm_dbg(fmt, ...)	plt_dbg(tm, fmt, ##__VA_ARGS__)
 #define plt_ree_dbg(fmt, ...)	plt_dbg(ree, fmt, ##__VA_ARGS__)
+#define plt_dpi_dbg(fmt, ...)	plt_dbg(dpi, fmt, ##__VA_ARGS__)

 /* Datapath logs */
 #define plt_dp_err(fmt, args...)                                               \
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 8c71497df8..1540dfadf9 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -7,6 +7,7 @@  INTERNAL {
 	cnxk_ipsec_outb_roundup_byte;
 	cnxk_logtype_base;
 	cnxk_logtype_cpt;
+	cnxk_logtype_dpi;
 	cnxk_logtype_mbox;
 	cnxk_logtype_ml;
 	cnxk_logtype_nix;
diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index eec6a897e2..f58bb92dbc 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,18 +2,6 @@ 
  * Copyright (C) 2021 Marvell International Ltd.
  */

-#include <string.h>
-#include <unistd.h>
-
-#include <bus_pci_driver.h>
-#include <rte_common.h>
-#include <rte_dmadev.h>
-#include <rte_dmadev_pmd.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
-#include <rte_mempool.h>
-#include <rte_pci.h>
-
 #include <cnxk_dmadev.h>

 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -24,14 +12,14 @@  cnxk_dmadev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_inf
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
 	RTE_SET_USED(size);

-	dev_info->max_vchans = MAX_VCHANS_PER_QUEUE;
+	dev_info->max_vchans = CNXK_DPI_MAX_VCHANS_PER_QUEUE;
 	dev_info->nb_vchans = dpivf->num_vchans;
 	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_MEM_TO_DEV |
 			     RTE_DMA_CAPA_DEV_TO_MEM | RTE_DMA_CAPA_DEV_TO_DEV |
 			     RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_COPY_SG;
-	dev_info->max_desc = DPI_MAX_DESC;
-	dev_info->min_desc = DPI_MIN_DESC;
-	dev_info->max_sges = DPI_MAX_POINTER;
+	dev_info->max_desc = CNXK_DPI_MAX_DESC;
+	dev_info->min_desc = CNXK_DPI_MIN_DESC;
+	dev_info->max_sges = CNXK_DPI_MAX_POINTER;

 	return 0;
 }
@@ -48,7 +36,7 @@  cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 		num_vchans = dpivf->num_vchans;
 		i = 0;
 	} else {
-		if (vchan >= MAX_VCHANS_PER_QUEUE)
+		if (vchan >= CNXK_DPI_MAX_VCHANS_PER_QUEUE)
 			return -EINVAL;

 		num_vchans = vchan + 1;
@@ -57,7 +45,7 @@  cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)

 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt;
+		max_desc = dpi_conf->c_desc.max_cnt + 1;
 		if (dpi_conf->c_desc.compl_ptr) {
 			for (j = 0; j < max_desc; j++)
 				rte_free(dpi_conf->c_desc.compl_ptr[j]);
@@ -71,39 +59,62 @@  cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 }

 static int
-cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf, uint32_t conf_sz)
+cnxk_dmadev_chunk_pool_create(struct rte_dma_dev *dev, uint32_t nb_chunks, uint32_t chunk_sz)
 {
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
 	struct cnxk_dpi_vf_s *dpivf = NULL;
-	int rc = 0;
-
-	RTE_SET_USED(conf_sz);
+	int rc;

 	dpivf = dev->fp_obj->dev_private;
+	/* Create chunk pool. */
+	snprintf(pool_name, sizeof(pool_name), "cnxk_dma_chunk_pool%d", dev->data->dev_id);

-	/* Accept only number of vchans as config from application. */
-	if (!(dpivf->flag & CNXK_DPI_DEV_START)) {
-		/* After config function, vchan setup function has to be called.
-		 * Free up vchan memory if any, before configuring num_vchans.
-		 */
-		cnxk_dmadev_vchan_free(dpivf, RTE_DMA_ALL_VCHAN);
-		dpivf->num_vchans = conf->nb_vchans;
+	nb_chunks += (CNXK_DPI_POOL_MAX_CACHE_SZ * rte_lcore_count());
+	dpivf->chunk_pool = rte_mempool_create_empty(
+		pool_name, nb_chunks, chunk_sz, CNXK_DPI_POOL_MAX_CACHE_SZ, 0, rte_socket_id(), 0);
+
+	if (dpivf->chunk_pool == NULL) {
+		plt_err("Unable to create chunkpool.");
+		return -ENOMEM;
 	}

-	if (dpivf->flag & CNXK_DPI_DEV_CONFIG)
-		return rc;
+	rc = rte_mempool_set_ops_byname(dpivf->chunk_pool, rte_mbuf_platform_mempool_ops(), NULL);
+	if (rc < 0) {
+		plt_err("Unable to set chunkpool ops");
+		goto free;
+	}

-	rc = roc_dpi_configure(&dpivf->rdpi);
+	rc = rte_mempool_populate_default(dpivf->chunk_pool);
 	if (rc < 0) {
-		plt_err("DMA configure failed err = %d", rc);
-		goto done;
+		plt_err("Unable to set populate chunkpool.");
+		goto free;
 	}
+	dpivf->aura = roc_npa_aura_handle_to_aura(dpivf->chunk_pool->pool_id);

-	dpivf->flag |= CNXK_DPI_DEV_CONFIG;
+	return 0;

-done:
+free:
+	rte_mempool_free(dpivf->chunk_pool);
 	return rc;
 }

+static int
+cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf, uint32_t conf_sz)
+{
+	struct cnxk_dpi_vf_s *dpivf = NULL;
+
+	RTE_SET_USED(conf_sz);
+	dpivf = dev->fp_obj->dev_private;
+
+	/* After config function, vchan setup function has to be called.
+	 * Free up vchan memory if any, before configuring num_vchans.
+	 */
+	cnxk_dmadev_vchan_free(dpivf, RTE_DMA_ALL_VCHAN);
+	dpivf->num_vchans = conf->nb_vchans;
+
+	return 0;
+}
+
 static int
 cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 			const struct rte_dma_vchan_conf *conf, uint32_t conf_sz)
@@ -117,9 +128,6 @@  cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,

 	RTE_SET_USED(conf_sz);

-	if (dpivf->flag & CNXK_DPI_DEV_START)
-		return 0;
-
 	header->cn9k.pt = DPI_HDR_PT_ZBW_CA;

 	switch (conf->direction) {
@@ -163,8 +171,8 @@  cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (!rte_is_power_of_2(max_desc))
 		max_desc = rte_align32pow2(max_desc);

-	if (max_desc > DPI_MAX_DESC)
-		max_desc = DPI_MAX_DESC;
+	if (max_desc > CNXK_DPI_MAX_DESC)
+		max_desc = CNXK_DPI_MAX_DESC;

 	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
@@ -182,7 +190,7 @@  cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 			return -ENOMEM;
 		}

-		dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
 	}

 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
@@ -203,9 +211,6 @@  cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,

 	RTE_SET_USED(conf_sz);

-	if (dpivf->flag & CNXK_DPI_DEV_START)
-		return 0;
-
 	header->cn10k.pt = DPI_HDR_PT_ZBW_CA;

 	switch (conf->direction) {
@@ -249,8 +254,8 @@  cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (!rte_is_power_of_2(max_desc))
 		max_desc = rte_align32pow2(max_desc);

-	if (max_desc > DPI_MAX_DESC)
-		max_desc = DPI_MAX_DESC;
+	if (max_desc > CNXK_DPI_MAX_DESC)
+		max_desc = CNXK_DPI_MAX_DESC;

 	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
@@ -267,7 +272,8 @@  cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 			plt_err("Failed to allocate for descriptor memory");
 			return -ENOMEM;
 		}
-		dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+
+		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
 	}

 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
@@ -280,10 +286,9 @@  cnxk_dmadev_start(struct rte_dma_dev *dev)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
 	struct cnxk_dpi_conf *dpi_conf;
-	int i, j;
-
-	if (dpivf->flag & CNXK_DPI_DEV_START)
-		return 0;
+	uint32_t chunks, nb_desc = 0;
+	int i, j, rc = 0;
+	void *chunk;

 	for (i = 0; i < dpivf->num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
@@ -292,29 +297,61 @@  cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt; j++) {
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
 			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = DPI_REQ_CDATA;
+				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
 		}
-
+		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
 	}

-	roc_dpi_enable(&dpivf->rdpi);
+	chunks = CNXK_DPI_CHUNKS_FROM_DESC(CNXK_DPI_QUEUE_BUF_SIZE, nb_desc);
+	rc = cnxk_dmadev_chunk_pool_create(dev, chunks, CNXK_DPI_QUEUE_BUF_SIZE);
+	if (rc < 0) {
+		plt_err("DMA pool configure failed err = %d", rc);
+		goto done;
+	}

-	dpivf->flag |= CNXK_DPI_DEV_START;
+	rc = rte_mempool_get(dpivf->chunk_pool, &chunk);
+	if (rc < 0) {
+		plt_err("DMA failed to get chunk pointer err = %d", rc);
+		rte_mempool_free(dpivf->chunk_pool);
+		goto done;
+	}

-	return 0;
+	rc = roc_dpi_configure(&dpivf->rdpi, CNXK_DPI_QUEUE_BUF_SIZE, dpivf->aura, (uint64_t)chunk);
+	if (rc < 0) {
+		plt_err("DMA configure failed err = %d", rc);
+		rte_mempool_free(dpivf->chunk_pool);
+		goto done;
+	}
+
+	dpivf->chunk_base = chunk;
+	dpivf->chunk_head = 0;
+	dpivf->chunk_size_m1 = (CNXK_DPI_QUEUE_BUF_SIZE >> 3) - 2;
+
+	roc_dpi_enable(&dpivf->rdpi);
+
+done:
+	return rc;
 }

 static int
 cnxk_dmadev_stop(struct rte_dma_dev *dev)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
+	uint64_t reg;
+
+	reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR);
+	while (!(reg & BIT_ULL(63)))
+		reg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR);

 	roc_dpi_disable(&dpivf->rdpi);
-	dpivf->flag &= ~CNXK_DPI_DEV_START;
+	rte_mempool_free(dpivf->chunk_pool);
+	dpivf->chunk_pool = NULL;
+	dpivf->chunk_base = NULL;
+	dpivf->chunk_size_m1 = 0;

 	return 0;
 }
@@ -335,7 +372,7 @@  cnxk_dmadev_close(struct rte_dma_dev *dev)
 }

 static inline int
-__dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
+__dpi_queue_write(struct cnxk_dpi_vf_s *dpi, uint64_t *cmds, int cmd_count)
 {
 	uint64_t *ptr = dpi->chunk_base;

@@ -346,31 +383,25 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 	 * Normally there is plenty of room in the current buffer for the
 	 * command
 	 */
-	if (dpi->chunk_head + cmd_count < dpi->pool_size_m1) {
+	if (dpi->chunk_head + cmd_count < dpi->chunk_size_m1) {
 		ptr += dpi->chunk_head;
 		dpi->chunk_head += cmd_count;
 		while (cmd_count--)
 			*ptr++ = *cmds++;
 	} else {
+		uint64_t *new_buff = NULL;
 		int count;
-		uint64_t *new_buff = dpi->chunk_next;
-
-		dpi->chunk_next = (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
-		if (!dpi->chunk_next) {
-			plt_dp_dbg("Failed to alloc next buffer from NPA");

-			/* NPA failed to allocate a buffer. Restoring chunk_next
-			 * to its original address.
-			 */
-			dpi->chunk_next = new_buff;
-			return -ENOSPC;
+		if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {
+			plt_dpi_dbg("Failed to alloc next buffer from NPA");
+			return -ENOMEM;
 		}

 		/*
 		 * Figure out how many cmd words will fit in this buffer.
 		 * One location will be needed for the next buffer pointer.
 		 */
-		count = dpi->pool_size_m1 - dpi->chunk_head;
+		count = dpi->chunk_size_m1 - dpi->chunk_head;
 		ptr += dpi->chunk_head;
 		cmd_count -= count;
 		while (count--)
@@ -395,17 +426,10 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 			*ptr++ = *cmds++;

 		/* queue index may be greater than pool size */
-		if (dpi->chunk_head >= dpi->pool_size_m1) {
-			new_buff = dpi->chunk_next;
-			dpi->chunk_next = (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
-			if (!dpi->chunk_next) {
-				plt_dp_dbg("Failed to alloc next buffer from NPA");
-
-				/* NPA failed to allocate a buffer. Restoring chunk_next
-				 * to its original address.
-				 */
-				dpi->chunk_next = new_buff;
-				return -ENOSPC;
+		if (dpi->chunk_head == dpi->chunk_size_m1) {
+			if (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {
+				plt_dpi_dbg("Failed to alloc next buffer from NPA");
+				return -ENOMEM;
 			}

 			/* Write next buffer address */
@@ -433,10 +457,10 @@  cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d

 	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
 	header->cn9k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpi_conf->c_desc, tail);
+	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);

 	if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return -ENOSPC;
 	}

@@ -465,9 +489,9 @@  cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	cmd[num_words++] = length;
 	cmd[num_words++] = lptr;

-	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	rc = __dpi_queue_write(dpivf, cmd, num_words);
 	if (unlikely(rc)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return rc;
 	}

@@ -498,10 +522,10 @@  cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge

 	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
 	header->cn9k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpi_conf->c_desc, tail);
+	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);

 	if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return -ENOSPC;
 	}

@@ -510,13 +534,13 @@  cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	 * For all other cases, src pointers are first pointers.
 	 */
 	if (header->cn9k.xtype == DPI_XTYPE_INBOUND) {
-		header->cn9k.nfst = nb_dst & DPI_MAX_POINTER;
-		header->cn9k.nlst = nb_src & DPI_MAX_POINTER;
+		header->cn9k.nfst = nb_dst & CNXK_DPI_MAX_POINTER;
+		header->cn9k.nlst = nb_src & CNXK_DPI_MAX_POINTER;
 		fptr = &dst[0];
 		lptr = &src[0];
 	} else {
-		header->cn9k.nfst = nb_src & DPI_MAX_POINTER;
-		header->cn9k.nlst = nb_dst & DPI_MAX_POINTER;
+		header->cn9k.nfst = nb_src & CNXK_DPI_MAX_POINTER;
+		header->cn9k.nlst = nb_dst & CNXK_DPI_MAX_POINTER;
 		fptr = &src[0];
 		lptr = &dst[0];
 	}
@@ -537,9 +561,9 @@  cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		lptr++;
 	}

-	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	rc = __dpi_queue_write(dpivf, cmd, num_words);
 	if (unlikely(rc)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return rc;
 	}

@@ -570,10 +594,10 @@  cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t

 	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
 	header->cn10k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpi_conf->c_desc, tail);
+	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);

 	if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return -ENOSPC;
 	}

@@ -593,9 +617,9 @@  cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	cmd[num_words++] = length;
 	cmd[num_words++] = lptr;

-	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	rc = __dpi_queue_write(dpivf, cmd, num_words);
 	if (unlikely(rc)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return rc;
 	}

@@ -627,15 +651,15 @@  cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge

 	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
 	header->cn10k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpi_conf->c_desc, tail);
+	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);

 	if (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return -ENOSPC;
 	}

-	header->cn10k.nfst = nb_src & DPI_MAX_POINTER;
-	header->cn10k.nlst = nb_dst & DPI_MAX_POINTER;
+	header->cn10k.nfst = nb_src & CNXK_DPI_MAX_POINTER;
+	header->cn10k.nlst = nb_dst & CNXK_DPI_MAX_POINTER;
 	fptr = &src[0];
 	lptr = &dst[0];

@@ -656,9 +680,9 @@  cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		lptr++;
 	}

-	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	rc = __dpi_queue_write(dpivf, cmd, num_words);
 	if (unlikely(rc)) {
-		STRM_DEC(dpi_conf->c_desc, tail);
+		CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
 		return rc;
 	}

@@ -688,16 +712,16 @@  cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 		comp_ptr = c_desc->compl_ptr[c_desc->head];

 		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == DPI_REQ_CDATA)
+			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
-			STRM_INC(*c_desc, head);
+			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}

-		comp_ptr->cdata = DPI_REQ_CDATA;
-		STRM_INC(*c_desc, head);
+		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		CNXK_DPI_STRM_INC(*c_desc, head);
 	}

 	dpi_conf->stats.completed += cnt;
@@ -720,13 +744,13 @@  cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 		comp_ptr = c_desc->compl_ptr[c_desc->head];
 		status[cnt] = comp_ptr->cdata;
 		if (status[cnt]) {
-			if (status[cnt] == DPI_REQ_CDATA)
+			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;

 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = DPI_REQ_CDATA;
-		STRM_INC(*c_desc, head);
+		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		CNXK_DPI_STRM_INC(*c_desc, head);
 	}

 	dpi_conf->stats.completed += cnt;
@@ -794,7 +818,7 @@  cnxk_stats_get(const struct rte_dma_dev *dev, uint16_t vchan, struct rte_dma_sta
 		goto done;
 	}

-	if (vchan >= MAX_VCHANS_PER_QUEUE)
+	if (vchan >= CNXK_DPI_MAX_VCHANS_PER_QUEUE)
 		return -EINVAL;

 	dpi_conf = &dpivf->conf[vchan];
@@ -822,7 +846,7 @@  cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
 		return 0;
 	}

-	if (vchan >= MAX_VCHANS_PER_QUEUE)
+	if (vchan >= CNXK_DPI_MAX_VCHANS_PER_QUEUE)
 		return -EINVAL;

 	dpi_conf = &dpivf->conf[vchan];
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 254e7fea20..d691f5fba2 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -4,22 +4,38 @@ 
 #ifndef CNXK_DMADEV_H
 #define CNXK_DMADEV_H

+#include <string.h>
+#include <unistd.h>
+
+#include <bus_pci_driver.h>
+#include <rte_common.h>
+#include <rte_dmadev.h>
+#include <rte_dmadev_pmd.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_mbuf_pool_ops.h>
+#include <rte_mempool.h>
+#include <rte_pci.h>
+
 #include <roc_api.h>

-#define DPI_MAX_POINTER	     15
-#define STRM_INC(s, var)     ((s).var = ((s).var + 1) & (s).max_cnt)
-#define STRM_DEC(s, var)     ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1))
-#define DPI_MAX_DESC	     2048
-#define DPI_MIN_DESC	     2
-#define MAX_VCHANS_PER_QUEUE 4
+#define CNXK_DPI_MAX_POINTER		    15
+#define CNXK_DPI_STRM_INC(s, var)	    ((s).var = ((s).var + 1) & (s).max_cnt)
+#define CNXK_DPI_STRM_DEC(s, var)	    ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1))
+#define CNXK_DPI_MAX_DESC		    32768
+#define CNXK_DPI_MIN_DESC		    2
+#define CNXK_DPI_MAX_VCHANS_PER_QUEUE	    4
+#define CNXK_DPI_QUEUE_BUF_SIZE		    16256
+#define CNXK_DPI_POOL_MAX_CACHE_SZ	    (16)
+#define CNXK_DPI_HDR_LEN		    4
+#define CNXK_DPI_CMD_LEN(src, dst)	    (CNXK_DPI_HDR_LEN + (src << 1) + (dst << 1))
+#define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER, CNXK_DPI_MAX_POINTER)
+#define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (desc / ((cz / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1

 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
-#define DPI_REQ_CDATA 0xFF
-
-#define CNXK_DPI_DEV_CONFIG (1ULL << 0)
-#define CNXK_DPI_DEV_START  (1ULL << 1)
+#define CNXK_DPI_REQ_CDATA 0xFF

 struct cnxk_dpi_compl_s {
 	uint64_t cdata;
@@ -45,8 +61,15 @@  struct cnxk_dpi_conf {
 };

 struct cnxk_dpi_vf_s {
+	/* Fast path */
+	uint64_t *chunk_base;
+	uint16_t chunk_head;
+	uint16_t chunk_size_m1;
+	struct rte_mempool *chunk_pool;
+	struct cnxk_dpi_conf conf[CNXK_DPI_MAX_VCHANS_PER_QUEUE];
+	/* Slow path */
 	struct roc_dpi rdpi;
-	struct cnxk_dpi_conf conf[MAX_VCHANS_PER_QUEUE];
+	uint32_t aura;
 	uint16_t num_vchans;
 	uint16_t flag;
 } __plt_cache_aligned;