[v2,09/30] dma/dpaa2: add short FD support

Message ID 20240722115843.1830105-10-g.singh@nxp.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series NXP DMA driver fixes and Enhancements |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Gagandeep Singh July 22, 2024, 11:58 a.m. UTC
From: Jun Yang <jun.yang@nxp.com>

Short FD can be used for single transfer scenario which shows higher
performance than FLE.
1) Save index context in FD att field for short and FLE(NonSG).
2) Identify FD type by att of FD.
3) Force 48 bits address for source address and fle according to spec.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/dma/dpaa2/dpaa2_qdma.c         | 314 +++++++++++++++++++------
 drivers/dma/dpaa2/dpaa2_qdma.h         |  69 ++++--
 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h |  13 -
 3 files changed, 285 insertions(+), 111 deletions(-)
  

Patch

diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c
index 53caccecd7..d1358b686c 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.c
+++ b/drivers/dma/dpaa2/dpaa2_qdma.c
@@ -522,7 +522,6 @@  dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
 	const struct qdma_cntx_fle_sdd *fle_sdd;
 	const struct qdma_sdd *sdd;
 	const struct qdma_cntx_sg *cntx_sg = NULL;
-	const struct qdma_cntx_long *cntx_long = NULL;
 
 	fle_sdd = container_of(fle, const struct qdma_cntx_fle_sdd, fle[0]);
 	sdd = fle_sdd->sdd;
@@ -545,11 +544,8 @@  dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
 		QBMAN_FLE_WORD4_FMT_SGE) {
 		cntx_sg = container_of(fle_sdd, const struct qdma_cntx_sg,
 			fle_sdd);
-	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt !=
 		QBMAN_FLE_WORD4_FMT_SBF) {
-		cntx_long = container_of(fle_sdd, const struct qdma_cntx_long,
-			fle_sdd);
-	} else {
 		DPAA2_QDMA_ERR("Unsupported fle format:%d",
 			fle[DPAA2_QDMA_SRC_FLE].word4.fmt);
 		return;
@@ -560,11 +556,6 @@  dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
 		dpaa2_qdma_sdd_dump(&sdd[i]);
 	}
 
-	if (cntx_long) {
-		DPAA2_QDMA_INFO("long format/Single buffer cntx idx:%d",
-			cntx_long->cntx_idx);
-	}
-
 	if (cntx_sg) {
 		DPAA2_QDMA_INFO("long format/SG format, job number:%d",
 			cntx_sg->job_nb);
@@ -582,6 +573,8 @@  dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
 			DPAA2_QDMA_INFO("cntx_idx[%d]:%d", i,
 				cntx_sg->cntx_idx[i]);
 		}
+	} else {
+		DPAA2_QDMA_INFO("long format/Single buffer cntx");
 	}
 }
 
@@ -644,7 +637,7 @@  dpaa2_qdma_copy_sg(void *dev_private,
 		offsetof(struct qdma_cntx_sg, fle_sdd) +
 		offsetof(struct qdma_cntx_fle_sdd, fle);
 
-	DPAA2_SET_FD_ADDR(fd, fle_iova);
+	dpaa2_qdma_fd_set_addr(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
 	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_sg);
 
@@ -680,6 +673,7 @@  dpaa2_qdma_copy_sg(void *dev_private,
 	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
 		dpaa2_qdma_long_fmt_dump(cntx_sg->fle_sdd.fle);
 
+	dpaa2_qdma_fd_save_att(fd, 0, DPAA2_QDMA_FD_SG);
 	qdma_vq->fd_idx++;
 	qdma_vq->silent_idx =
 		(qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1);
@@ -696,74 +690,178 @@  dpaa2_qdma_copy_sg(void *dev_private,
 	return ret;
 }
 
+static inline void
+qdma_populate_fd_pci(uint64_t src, uint64_t dest,
+	uint32_t len, struct qbman_fd *fd,
+	struct dpaa2_qdma_rbp *rbp, int ser)
+{
+	fd->simple_pci.saddr_lo = lower_32_bits(src);
+	fd->simple_pci.saddr_hi = upper_32_bits(src);
+
+	fd->simple_pci.len_sl = len;
+
+	fd->simple_pci.bmt = DPAA2_QDMA_BMT_DISABLE;
+	fd->simple_pci.fmt = DPAA2_QDMA_FD_SHORT_FORMAT;
+	fd->simple_pci.sl = 1;
+	fd->simple_pci.ser = ser;
+	if (ser)
+		fd->simple.frc |= QDMA_SER_CTX;
+
+	fd->simple_pci.sportid = rbp->sportid;
+
+	fd->simple_pci.svfid = rbp->svfid;
+	fd->simple_pci.spfid = rbp->spfid;
+	fd->simple_pci.svfa = rbp->svfa;
+	fd->simple_pci.dvfid = rbp->dvfid;
+	fd->simple_pci.dpfid = rbp->dpfid;
+	fd->simple_pci.dvfa = rbp->dvfa;
+
+	fd->simple_pci.srbp = rbp->srbp;
+	if (rbp->srbp)
+		fd->simple_pci.rdttype = 0;
+	else
+		fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache;
+
+	/*dest is pcie memory */
+	fd->simple_pci.dportid = rbp->dportid;
+	fd->simple_pci.drbp = rbp->drbp;
+	if (rbp->drbp)
+		fd->simple_pci.wrttype = 0;
+	else
+		fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache;
+
+	fd->simple_pci.daddr_lo = lower_32_bits(dest);
+	fd->simple_pci.daddr_hi = upper_32_bits(dest);
+}
+
+static inline void
+qdma_populate_fd_ddr(uint64_t src, uint64_t dest,
+	uint32_t len, struct qbman_fd *fd, int ser)
+{
+	fd->simple_ddr.saddr_lo = lower_32_bits(src);
+	fd->simple_ddr.saddr_hi = upper_32_bits(src);
+
+	fd->simple_ddr.len = len;
+
+	fd->simple_ddr.bmt = DPAA2_QDMA_BMT_DISABLE;
+	fd->simple_ddr.fmt = DPAA2_QDMA_FD_SHORT_FORMAT;
+	fd->simple_ddr.sl = 1;
+	fd->simple_ddr.ser = ser;
+	if (ser)
+		fd->simple.frc |= QDMA_SER_CTX;
+	/**
+	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
+	 * Coherent copy of cacheable memory,
+	 * lookup in downstream cache, no allocate
+	 * on miss.
+	 */
+	fd->simple_ddr.rns = 0;
+	fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache;
+	/**
+	 * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111
+	 * Coherent write of cacheable memory,
+	 * lookup in downstream cache, no allocate on miss
+	 */
+	fd->simple_ddr.wns = 0;
+	fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache;
+
+	fd->simple_ddr.daddr_lo = lower_32_bits(dest);
+	fd->simple_ddr.daddr_hi = upper_32_bits(dest);
+}
+
 static int
-dpaa2_qdma_copy(void *dev_private, uint16_t vchan,
-	rte_iova_t src, rte_iova_t dst,
-	uint32_t length, uint64_t flags)
+dpaa2_qdma_short_copy(struct qdma_virt_queue *qdma_vq,
+	rte_iova_t src, rte_iova_t dst, uint32_t length,
+	int is_silent, uint64_t flags)
 {
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
 	int ret = 0, expected;
 	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
-	struct qdma_cntx_long *cntx_long = NULL;
-	rte_iova_t cntx_iova, fle_iova, sdd_iova;
+
+	memset(fd, 0, sizeof(struct qbman_fd));
+
+	if (qdma_vq->rbp.drbp || qdma_vq->rbp.srbp) {
+		/** PCIe EP*/
+		qdma_populate_fd_pci(src,
+			dst, length,
+			fd, &qdma_vq->rbp,
+			is_silent ? 0 : 1);
+	} else {
+		/** DDR or PCIe RC*/
+		qdma_populate_fd_ddr(src,
+			dst, length,
+			fd, is_silent ? 0 : 1);
+	}
+	dpaa2_qdma_fd_save_att(fd, DPAA2_QDMA_IDX_FROM_FLAG(flags),
+		DPAA2_QDMA_FD_SHORT);
+	qdma_vq->fd_idx++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		expected = qdma_vq->fd_idx;
+		ret = dpaa2_qdma_multi_eq(qdma_vq);
+		if (likely(ret == expected)) {
+			qdma_vq->copy_num++;
+			return (qdma_vq->copy_num - 1) & UINT16_MAX;
+		}
+	} else {
+		qdma_vq->copy_num++;
+		return (qdma_vq->copy_num - 1) & UINT16_MAX;
+	}
+
+	return ret;
+}
+
+static int
+dpaa2_qdma_long_copy(struct qdma_virt_queue *qdma_vq,
+	rte_iova_t src, rte_iova_t dst, uint32_t length,
+	int is_silent, uint64_t flags)
+{
+	int ret = 0, expected;
+	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
+	struct qdma_cntx_fle_sdd *fle_sdd = NULL;
+	rte_iova_t fle_iova, sdd_iova;
 	struct qbman_fle *fle;
 	struct qdma_sdd *sdd;
 
 	memset(fd, 0, sizeof(struct qbman_fd));
 
-	if (qdma_dev->is_silent) {
-		cntx_long = qdma_vq->cntx_long[qdma_vq->silent_idx];
+	if (is_silent) {
+		fle_sdd = qdma_vq->cntx_fle_sdd[qdma_vq->silent_idx];
 	} else {
 		ret = rte_mempool_get(qdma_vq->fle_pool,
-			(void **)&cntx_long);
+			(void **)&fle_sdd);
 		if (ret)
 			return ret;
 		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
-		cntx_long->cntx_idx = DPAA2_QDMA_IDX_FROM_FLAG(flags);
 	}
 
-#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	cntx_iova = rte_mempool_virt2iova(cntx_long);
-#else
-	cntx_iova = DPAA2_VADDR_TO_IOVA(cntx_long);
-#endif
-
-	fle = cntx_long->fle_sdd.fle;
-	fle_iova = cntx_iova +
-		offsetof(struct qdma_cntx_long, fle_sdd) +
-		offsetof(struct qdma_cntx_fle_sdd, fle);
+	fle = fle_sdd->fle;
+	fle_iova = (uint64_t)fle - qdma_vq->fle_iova2va_offset;
 
-	DPAA2_SET_FD_ADDR(fd, fle_iova);
+	dpaa2_qdma_fd_set_addr(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
-	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_long);
+	DPAA2_SET_FD_FLC(fd, (uint64_t)fle);
 
 	if (qdma_vq->fle_pre_populate) {
 		if (unlikely(!fle[DPAA2_QDMA_SRC_FLE].length)) {
-			fle_sdd_pre_populate(&cntx_long->fle_sdd,
+			fle_sdd_pre_populate(fle_sdd,
 				&qdma_vq->rbp,
 				0, 0, QBMAN_FLE_WORD4_FMT_SBF);
-			if (!qdma_dev->is_silent && cntx_long) {
-				cntx_long->cntx_idx =
-					DPAA2_QDMA_IDX_FROM_FLAG(flags);
-			}
 		}
 
 		fle_post_populate(fle, src, dst, length);
 	} else {
-		sdd = cntx_long->fle_sdd.sdd;
-		sdd_iova = cntx_iova +
-			offsetof(struct qdma_cntx_long, fle_sdd) +
-			offsetof(struct qdma_cntx_fle_sdd, sdd);
+		sdd = fle_sdd->sdd;
+		sdd_iova = (uint64_t)sdd - qdma_vq->fle_iova2va_offset;
 		fle_populate(fle, sdd, sdd_iova, &qdma_vq->rbp,
 			src, dst, length,
 			QBMAN_FLE_WORD4_FMT_SBF);
 	}
 
 	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
-		dpaa2_qdma_long_fmt_dump(cntx_long->fle_sdd.fle);
+		dpaa2_qdma_long_fmt_dump(fle);
 
+	dpaa2_qdma_fd_save_att(fd, DPAA2_QDMA_IDX_FROM_FLAG(flags),
+		DPAA2_QDMA_FD_LONG);
 	qdma_vq->fd_idx++;
 	qdma_vq->silent_idx =
 		(qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1);
@@ -771,15 +869,89 @@  dpaa2_qdma_copy(void *dev_private, uint16_t vchan,
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
 		expected = qdma_vq->fd_idx;
 		ret = dpaa2_qdma_multi_eq(qdma_vq);
-		if (likely(ret == expected))
-			return 0;
+		if (likely(ret == expected)) {
+			qdma_vq->copy_num++;
+			return (qdma_vq->copy_num - 1) & UINT16_MAX;
+		}
 	} else {
-		return 0;
+		qdma_vq->copy_num++;
+		return (qdma_vq->copy_num - 1) & UINT16_MAX;
 	}
 
 	return ret;
 }
 
+static int
+dpaa2_qdma_copy(void *dev_private, uint16_t vchan,
+	rte_iova_t src, rte_iova_t dst,
+	uint32_t length, uint64_t flags)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+	if (qdma_vq->using_short_fd)
+		return dpaa2_qdma_short_copy(qdma_vq, src, dst,
+				length, qdma_dev->is_silent, flags);
+	else
+		return dpaa2_qdma_long_copy(qdma_vq, src, dst,
+				length, qdma_dev->is_silent, flags);
+}
+
+static inline int
+dpaa2_qdma_dq_fd(const struct qbman_fd *fd,
+	struct qdma_virt_queue *qdma_vq,
+	uint16_t *free_space, uint16_t *fle_elem_nb)
+{
+	uint16_t idx, att;
+	enum dpaa2_qdma_fd_type type;
+	int ret;
+	struct qdma_cntx_sg *cntx_sg;
+	struct qdma_cntx_fle_sdd *fle_sdd;
+
+	att = dpaa2_qdma_fd_get_att(fd);
+	type = DPAA2_QDMA_FD_ATT_TYPE(att);
+	if (type == DPAA2_QDMA_FD_SHORT) {
+		idx = DPAA2_QDMA_FD_ATT_CNTX(att);
+		ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				&idx, 1, free_space);
+		if (unlikely(ret != 1))
+			return -ENOSPC;
+
+		return 0;
+	}
+	if (type == DPAA2_QDMA_FD_LONG) {
+		idx = DPAA2_QDMA_FD_ATT_CNTX(att);
+		fle_sdd = (void *)(uintptr_t)DPAA2_GET_FD_FLC(fd);
+		qdma_vq->fle_elem[*fle_elem_nb] = fle_sdd;
+		(*fle_elem_nb)++;
+		ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				&idx, 1, free_space);
+		if (unlikely(ret != 1))
+			return -ENOSPC;
+
+		return 0;
+	}
+	if (type == DPAA2_QDMA_FD_SG) {
+		fle_sdd = (void *)(uintptr_t)DPAA2_GET_FD_FLC(fd);
+		qdma_vq->fle_elem[*fle_elem_nb] = fle_sdd;
+		(*fle_elem_nb)++;
+		cntx_sg = container_of(fle_sdd,
+				struct qdma_cntx_sg, fle_sdd);
+		ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				cntx_sg->cntx_idx,
+				cntx_sg->job_nb, free_space);
+		if (unlikely(ret < cntx_sg->job_nb))
+			return -ENOSPC;
+
+		return 0;
+	}
+
+	DPAA2_QDMA_ERR("Invalid FD type, ATT=0x%04x",
+		fd->simple_ddr.rsv1_att);
+	return -EIO;
+}
+
 static uint16_t
 dpaa2_qdma_dequeue(void *dev_private,
 	uint16_t vchan, const uint16_t nb_cpls,
@@ -799,10 +971,6 @@  dpaa2_qdma_dequeue(void *dev_private,
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
 	int ret, pull_size;
-	struct qbman_fle *fle;
-	struct qdma_cntx_fle_sdd *fle_sdd;
-	struct qdma_cntx_sg *cntx_sg;
-	struct qdma_cntx_long *cntx_long;
 	uint16_t free_space = 0, fle_elem_nb = 0;
 
 	if (unlikely(qdma_dev->is_silent))
@@ -901,25 +1069,8 @@  dpaa2_qdma_dequeue(void *dev_private,
 				continue;
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
-		fle_sdd = (void *)(uintptr_t)DPAA2_GET_FD_FLC(fd);
-		fle = fle_sdd->fle;
-		qdma_vq->fle_elem[fle_elem_nb] = fle_sdd;
-		fle_elem_nb++;
-		if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
-			QBMAN_FLE_WORD4_FMT_SGE) {
-			cntx_sg = container_of(fle_sdd,
-				struct qdma_cntx_sg, fle_sdd);
-			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
-				cntx_sg->cntx_idx,
-				cntx_sg->job_nb, &free_space);
-		} else {
-			cntx_long = container_of(fle_sdd,
-				struct qdma_cntx_long, fle_sdd);
-			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
-				&cntx_long->cntx_idx,
-				1, &free_space);
-		}
-		if (!ret || free_space < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)
+		ret = dpaa2_qdma_dq_fd(fd, qdma_vq, &free_space, &fle_elem_nb);
+		if (ret || free_space < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)
 			pending = 0;
 
 		dq_storage++;
@@ -944,8 +1095,10 @@  dpaa2_qdma_dequeue(void *dev_private,
 	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
 	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	rte_mempool_put_bulk(qdma_vq->fle_pool,
-		qdma_vq->fle_elem, fle_elem_nb);
+	if (fle_elem_nb > 0) {
+		rte_mempool_put_bulk(qdma_vq->fle_pool,
+			qdma_vq->fle_elem, fle_elem_nb);
+	}
 
 	num_rx = qdma_cntx_idx_ring_dq(qdma_vq->ring_cntx_idx,
 		cntx_idx, nb_cpls);
@@ -1178,11 +1331,18 @@  dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	else
 		qdma_dev->vqs[vchan].flags &= (~DPAA2_QDMA_DESC_DEBUG_FLAG);
 
+	/** Default Enable Short FD for nonSG format.
+	 * Short FD has higher perf than FLE.
+	 */
+	env = getenv("DPAA2_QDMA_USING_SHORT_FD");
+	if (env)
+		qdma_dev->vqs[vchan].using_short_fd = atoi(env);
+	else
+		qdma_dev->vqs[vchan].using_short_fd = 1;
+
 	snprintf(pool_name, sizeof(pool_name),
 		"qdma_fle_pool_dev%d_qid%d", dpdmai_dev->dpdmai_id, vchan);
-	pool_size = RTE_MAX(sizeof(struct qdma_cntx_sg),
-			    sizeof(struct qdma_cntx_long));
-
+	pool_size = sizeof(struct qdma_cntx_sg);
 	qdma_dev->vqs[vchan].fle_pool = rte_mempool_create(pool_name,
 			DPAA2_QDMA_MAX_DESC * 2, pool_size,
 			512, 0, NULL, NULL, NULL, NULL,
@@ -1202,7 +1362,7 @@  dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 			return ret;
 		}
 		ret = rte_mempool_get_bulk(qdma_dev->vqs[vchan].fle_pool,
-				(void **)qdma_dev->vqs[vchan].cntx_long,
+				(void **)qdma_dev->vqs[vchan].cntx_fle_sdd,
 				DPAA2_QDMA_MAX_DESC);
 		if (ret) {
 			DPAA2_QDMA_ERR("long cntx get from %s for silent mode",
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h
index 371393cb85..0be65e1cc6 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/dpaa2_qdma.h
@@ -1,5 +1,5 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2023 NXP
+ * Copyright 2018-2024 NXP
  */
 
 #ifndef _DPAA2_QDMA_H_
@@ -12,17 +12,8 @@ 
 #define DPAA2_QDMA_MIN_DESC		1
 #define DPAA2_QDMA_MAX_VHANS		64
 
-#define DPAA2_QDMA_VQ_FD_SHORT_FORMAT		(1ULL << 0)
-#define DPAA2_QDMA_VQ_FD_SG_FORMAT		(1ULL << 1)
-#define DPAA2_QDMA_VQ_NO_RESPONSE		(1ULL << 2)
-
 #define DPAA2_DPDMAI_MAX_QUEUES	16
 
-#define QDMA_JOB_HW_CNTX_IDX (RTE_DPAA2_QDMA_JOB_USR_CNTX_IDX + 1)
-
-/** FLE pool cache size */
-#define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
-
 /** Notification by FQD_CTX[fqid] */
 #define QDMA_SER_CTX (1 << 8)
 #define DPAA2_RBP_MEM_RW            0x0
@@ -40,9 +31,14 @@ 
 #define DPAA2_LX2_COHERENT_ALLOCATE_CACHE	0xb
 
 /** Maximum possible H/W Queues on each core */
-#define MAX_HW_QUEUE_PER_CORE		64
+#define MAX_HW_QUEUE_PER_CORE 64
+
+#define DPAA2_QDMA_FD_FLUSH_FORMAT 0x0
+#define DPAA2_QDMA_FD_LONG_FORMAT 0x1
+#define DPAA2_QDMA_FD_SHORT_FORMAT 0x3
 
-#define QDMA_RBP_UPPER_ADDRESS_MASK (0xfff0000000000)
+#define DPAA2_QDMA_BMT_ENABLE 0x1
+#define DPAA2_QDMA_BMT_DISABLE 0x0
 
 /** Source/Destination Descriptor */
 struct qdma_sdd {
@@ -99,8 +95,8 @@  struct qdma_sdd {
 #define QDMA_SG_SL_SHORT	0x1 /* short length */
 #define QDMA_SG_SL_LONG	0x0 /* long length */
 #define QDMA_SG_F	0x1 /* last sg entry */
-#define QDMA_SG_BMT_ENABLE 0x1
-#define QDMA_SG_BMT_DISABLE 0x0
+#define QDMA_SG_BMT_ENABLE DPAA2_QDMA_BMT_ENABLE
+#define QDMA_SG_BMT_DISABLE DPAA2_QDMA_BMT_DISABLE
 
 struct qdma_sg_entry {
 	uint32_t addr_lo;		/* address 0:31 */
@@ -166,6 +162,40 @@  struct dpaa2_qdma_rbp {
 	uint32_t rsv:2;
 };
 
+enum dpaa2_qdma_fd_type {
+	DPAA2_QDMA_FD_SHORT = 1,
+	DPAA2_QDMA_FD_LONG = 2,
+	DPAA2_QDMA_FD_SG = 3
+};
+
+#define DPAA2_QDMA_FD_ATT_TYPE_OFFSET 13
+#define DPAA2_QDMA_FD_ATT_TYPE(att) \
+	(att >> DPAA2_QDMA_FD_ATT_TYPE_OFFSET)
+#define DPAA2_QDMA_FD_ATT_CNTX(att) \
+	(att & ((1 << DPAA2_QDMA_FD_ATT_TYPE_OFFSET) - 1))
+
+static inline void
+dpaa2_qdma_fd_set_addr(struct qbman_fd *fd,
+	uint64_t addr)
+{
+	fd->simple_ddr.saddr_lo = lower_32_bits(addr);
+	fd->simple_ddr.saddr_hi = upper_32_bits(addr);
+}
+
+static inline void
+dpaa2_qdma_fd_save_att(struct qbman_fd *fd,
+	uint16_t job_idx, enum dpaa2_qdma_fd_type type)
+{
+	fd->simple_ddr.rsv1_att = job_idx |
+		(type << DPAA2_QDMA_FD_ATT_TYPE_OFFSET);
+}
+
+static inline uint16_t
+dpaa2_qdma_fd_get_att(const struct qbman_fd *fd)
+{
+	return fd->simple_ddr.rsv1_att;
+}
+
 enum {
 	DPAA2_QDMA_SDD_FLE,
 	DPAA2_QDMA_SRC_FLE,
@@ -193,12 +223,6 @@  struct qdma_cntx_sg {
 	uint16_t rsv[3];
 } __rte_packed;
 
-struct qdma_cntx_long {
-	struct qdma_cntx_fle_sdd fle_sdd;
-	uint16_t cntx_idx;
-	uint16_t rsv[3];
-} __rte_packed;
-
 #define DPAA2_QDMA_IDXADDR_FROM_SG_FLAG(flag) \
 	((void *)(uintptr_t)((flag) - ((flag) & RTE_DPAA2_QDMA_SG_IDX_ADDR_MASK)))
 
@@ -241,6 +265,7 @@  struct qdma_virt_queue {
 	struct dpaa2_dpdmai_dev *dpdmai_dev;
 	/** FLE pool for the queue */
 	struct rte_mempool *fle_pool;
+	uint64_t fle_iova2va_offset;
 	void **fle_elem;
 	/** Route by port */
 	struct dpaa2_qdma_rbp rbp;
@@ -252,6 +277,7 @@  struct qdma_virt_queue {
 	uint64_t num_enqueues;
 	/* Total number of dequeues from this VQ */
 	uint64_t num_dequeues;
+	uint64_t copy_num;
 
 	uint16_t vq_id;
 	uint32_t flags;
@@ -261,10 +287,11 @@  struct qdma_virt_queue {
 
 	/**Used for silent enabled*/
 	struct qdma_cntx_sg *cntx_sg[DPAA2_QDMA_MAX_DESC];
-	struct qdma_cntx_long *cntx_long[DPAA2_QDMA_MAX_DESC];
+	struct qdma_cntx_fle_sdd *cntx_fle_sdd[DPAA2_QDMA_MAX_DESC];
 	uint16_t silent_idx;
 
 	int num_valid_jobs;
+	int using_short_fd;
 
 	struct rte_dma_stats stats;
 };
diff --git a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
index e49604c8fc..df21b39cae 100644
--- a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
@@ -7,19 +7,6 @@ 
 
 #include <rte_compat.h>
 
-#define RTE_DPAA2_QDMA_IDX_SHIFT_POS 20
-#define RTE_DPAA2_QDMA_LEN_MASK \
-	(~((~0u) << RTE_DPAA2_QDMA_IDX_SHIFT_POS))
-
-#define RTE_DPAA2_QDMA_IDX_LEN(idx, len) \
-	((uint32_t)((idx << RTE_DPAA2_QDMA_IDX_SHIFT_POS) | (len & RTE_DPAA2_QDMA_LEN_MASK)))
-
-#define RTE_DPAA2_QDMA_IDX_FROM_LENGTH(length) \
-	((uint16_t)((length) >> RTE_DPAA2_QDMA_IDX_SHIFT_POS))
-
-#define RTE_DPAA2_QDMA_LEN_FROM_LENGTH(length) \
-	((length) & RTE_DPAA2_QDMA_LEN_MASK)
-
 #define RTE_DPAA2_QDMA_COPY_IDX_OFFSET 8
 #define RTE_DPAA2_QDMA_SG_IDX_ADDR_ALIGN \
 	RTE_BIT64(RTE_DPAA2_QDMA_COPY_IDX_OFFSET)