get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/130869/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 130869,
    "url": "http://patchwork.dpdk.org/api/patches/130869/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20230830075655.8004-2-pbhagavatula@marvell.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20230830075655.8004-2-pbhagavatula@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20230830075655.8004-2-pbhagavatula@marvell.com",
    "date": "2023-08-30T07:56:55",
    "name": "[2/2] dma/cnxk: rewrite DMA fastpath",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "b499c5a60272f479781cdef3413b2ed519bf240b",
    "submitter": {
        "id": 1183,
        "url": "http://patchwork.dpdk.org/api/people/1183/?format=api",
        "name": "Pavan Nikhilesh Bhagavatula",
        "email": "pbhagavatula@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "http://patchwork.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20230830075655.8004-2-pbhagavatula@marvell.com/mbox/",
    "series": [
        {
            "id": 29366,
            "url": "http://patchwork.dpdk.org/api/series/29366/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=29366",
            "date": "2023-08-30T07:56:54",
            "name": "[1/2] dma/cnxk: use mempool for DMA chunk pool",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/29366/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/130869/comments/",
    "check": "fail",
    "checks": "http://patchwork.dpdk.org/api/patches/130869/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id C037B41F63;\n\tWed, 30 Aug 2023 09:57:12 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 257944028D;\n\tWed, 30 Aug 2023 09:57:07 +0200 (CEST)",
            "from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com\n [67.231.148.174])\n by mails.dpdk.org (Postfix) with ESMTP id B98A04027F\n for <dev@dpdk.org>; Wed, 30 Aug 2023 09:57:05 +0200 (CEST)",
            "from pps.filterd (m0045849.ppops.net [127.0.0.1])\n by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 37U6P7dd004364 for <dev@dpdk.org>; Wed, 30 Aug 2023 00:57:04 -0700",
            "from dc5-exch02.marvell.com ([199.233.59.182])\n by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3st0fyr8mn-1\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Wed, 30 Aug 2023 00:57:04 -0700",
            "from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.48;\n Wed, 30 Aug 2023 00:57:03 -0700",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com\n (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend\n Transport; Wed, 30 Aug 2023 00:57:02 -0700",
            "from MININT-80QBFE8.corp.innovium.com (MININT-80QBFE8.marvell.com\n [10.28.164.106])\n by maili.marvell.com (Postfix) with ESMTP id F062A3F7081;\n Wed, 30 Aug 2023 00:57:00 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=jNardA3EtQGp8g8o9ln3kvwdGHSHbefrUwK867QFJGw=;\n b=aBUOfM8hdxzJ+HK3shOqXO9to552F+19PMkWDm1LYq9Fy3n4U7E6FGwnLgoHIJBfMTR0\n Hw/BLS0+QavPnTo7uO/09koZLy66NjPujeK/UyZ+UhCkfml4m8udnK6IY6z9Ef6/2t1H\n O6Tdk61qt0fRqc2w+P4uDTAjmvHczNk9aHSRTnrcL25ClW46ueAdp4J8AEqCWUpyLOEs\n jth9RxQ49EiNx9lmsRBNc8V5FQKz4uKroKa0xIBkdA2Lnmz0w0F0kzvWDOpYbzuTR/Ef\n ky+YT2Q8dnZJ2FX09kMh9d2bAAuuOhnkXoEwr9blQ+dEk2An8nyHW0H4uZciMbzWEs6p Pw==",
        "From": "<pbhagavatula@marvell.com>",
        "To": "<jerinj@marvell.com>, Vamsi Attunuru <vattunuru@marvell.com>",
        "CC": "<dev@dpdk.org>, Pavan Nikhilesh <pbhagavatula@marvell.com>",
        "Subject": "[PATCH 2/2] dma/cnxk: rewrite DMA fastpath",
        "Date": "Wed, 30 Aug 2023 13:26:55 +0530",
        "Message-ID": "<20230830075655.8004-2-pbhagavatula@marvell.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20230830075655.8004-1-pbhagavatula@marvell.com>",
        "References": "<20230830075655.8004-1-pbhagavatula@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-GUID": "E2KMqYKMMOBNyYmJC2dx84d4t-K6zaYs",
        "X-Proofpoint-ORIG-GUID": "E2KMqYKMMOBNyYmJC2dx84d4t-K6zaYs",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.267,Aquarius:18.0.957,Hydra:6.0.601,FMLib:17.11.176.26\n definitions=2023-08-29_16,2023-08-29_01,2023-05-22_02",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "From: Pavan Nikhilesh <pbhagavatula@marvell.com>\n\nRewrite DMA fastpath to use NEON instructions and reduce number\nof words read from config.\n\nSigned-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>\n---\n drivers/dma/cnxk/cnxk_dmadev.c    | 454 +++--------------------------\n drivers/dma/cnxk/cnxk_dmadev.h    |  89 +++++-\n drivers/dma/cnxk/cnxk_dmadev_fp.c | 455 ++++++++++++++++++++++++++++++\n drivers/dma/cnxk/meson.build      |   2 +-\n 4 files changed, 570 insertions(+), 430 deletions(-)\n create mode 100644 drivers/dma/cnxk/cnxk_dmadev_fp.c",
    "diff": "diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c\nindex 35c2b79156..465290ce7a 100644\n--- a/drivers/dma/cnxk/cnxk_dmadev.c\n+++ b/drivers/dma/cnxk/cnxk_dmadev.c\n@@ -2,19 +2,6 @@\n  * Copyright (C) 2021 Marvell International Ltd.\n  */\n \n-#include <string.h>\n-#include <unistd.h>\n-\n-#include <bus_pci_driver.h>\n-#include <rte_common.h>\n-#include <rte_dmadev.h>\n-#include <rte_dmadev_pmd.h>\n-#include <rte_eal.h>\n-#include <rte_lcore.h>\n-#include <rte_mbuf_pool_ops.h>\n-#include <rte_mempool.h>\n-#include <rte_pci.h>\n-\n #include <cnxk_dmadev.h>\n \n static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);\n@@ -166,22 +153,9 @@ cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf,\n \treturn rc;\n }\n \n-static int\n-cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n-\t\t\tconst struct rte_dma_vchan_conf *conf, uint32_t conf_sz)\n+static void\n+cn9k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf)\n {\n-\tstruct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tuint16_t max_desc;\n-\tuint32_t size;\n-\tint i;\n-\n-\tRTE_SET_USED(conf_sz);\n-\n-\tif (dpivf->flag & CNXK_DPI_DEV_START)\n-\t\treturn 0;\n-\n \theader->cn9k.pt = DPI_HDR_PT_ZBW_CA;\n \n \tswitch (conf->direction) {\n@@ -217,57 +191,11 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n \t\theader->cn9k.fport = conf->dst_port.pcie.coreid;\n \t\theader->cn9k.pvfe = 0;\n \t};\n-\n-\t/* Free up descriptor memory before allocating. */\n-\tcnxk_dmadev_vchan_free(dpivf, vchan);\n-\n-\tmax_desc = conf->nb_desc;\n-\tif (!rte_is_power_of_2(max_desc))\n-\t\tmax_desc = rte_align32pow2(max_desc);\n-\n-\tif (max_desc > DPI_MAX_DESC)\n-\t\tmax_desc = DPI_MAX_DESC;\n-\n-\tsize = (max_desc * sizeof(struct cnxk_dpi_compl_s *));\n-\tdpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);\n-\n-\tif (dpi_conf->c_desc.compl_ptr == NULL) {\n-\t\tplt_err(\"Failed to allocate for comp_data\");\n-\t\treturn -ENOMEM;\n-\t}\n-\n-\tfor (i = 0; i < max_desc; i++) {\n-\t\tdpi_conf->c_desc.compl_ptr[i] =\n-\t\t\trte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);\n-\t\tif (!dpi_conf->c_desc.compl_ptr[i]) {\n-\t\t\tplt_err(\"Failed to allocate for descriptor memory\");\n-\t\t\treturn -ENOMEM;\n-\t\t}\n-\n-\t\tdpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;\n-\t}\n-\n-\tdpi_conf->c_desc.max_cnt = (max_desc - 1);\n-\n-\treturn 0;\n }\n \n-static int\n-cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n-\t\t\t const struct rte_dma_vchan_conf *conf, uint32_t conf_sz)\n+static void\n+cn10k_dmadev_setup_hdr(union cnxk_dpi_instr_cmd *header, const struct rte_dma_vchan_conf *conf)\n {\n-\tstruct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tuint16_t max_desc;\n-\tuint32_t size;\n-\tint i;\n-\n-\tRTE_SET_USED(conf_sz);\n-\n-\tif (dpivf->flag & CNXK_DPI_DEV_START)\n-\t\treturn 0;\n-\n \theader->cn10k.pt = DPI_HDR_PT_ZBW_CA;\n \n \tswitch (conf->direction) {\n@@ -303,6 +231,29 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n \t\theader->cn10k.fport = conf->dst_port.pcie.coreid;\n \t\theader->cn10k.pvfe = 0;\n \t};\n+}\n+\n+static int\n+cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n+\t\t\tconst struct rte_dma_vchan_conf *conf, uint32_t conf_sz)\n+{\n+\tstruct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;\n+\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n+\tunion cnxk_dpi_instr_cmd *header;\n+\tuint16_t max_desc;\n+\tuint32_t size;\n+\tint i;\n+\n+\tRTE_SET_USED(conf_sz);\n+\n+\theader = (union cnxk_dpi_instr_cmd *)&dpi_conf->cmd.u;\n+\tif (dpivf->flag & CNXK_DPI_DEV_START)\n+\t\treturn 0;\n+\n+\tif (dpivf->is_cn10k)\n+\t\tcn10k_dmadev_setup_hdr(header, conf);\n+\telse\n+\t\tcn9k_dmadev_setup_hdr(header, conf);\n \n \t/* Free up descriptor memory before allocating. */\n \tcnxk_dmadev_vchan_free(dpivf, vchan);\n@@ -329,6 +280,7 @@ cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,\n \t\t\tplt_err(\"Failed to allocate for descriptor memory\");\n \t\t\treturn -ENOMEM;\n \t\t}\n+\n \t\tdpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;\n \t}\n \n@@ -374,6 +326,11 @@ static int\n cnxk_dmadev_stop(struct rte_dma_dev *dev)\n {\n \tstruct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;\n+\tuint64_t reg;\n+\n+\treg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR);\n+\twhile (!(reg & BIT_ULL(63)))\n+\t\treg = plt_read64(dpivf->rdpi.rbase + DPI_VDMA_SADDR);\n \n \troc_dpi_disable(&dpivf->rdpi);\n \tdpivf->flag &= ~CNXK_DPI_DEV_START;\n@@ -396,332 +353,6 @@ cnxk_dmadev_close(struct rte_dma_dev *dev)\n \treturn 0;\n }\n \n-static inline int\n-__dpi_queue_write(struct cnxk_dpi_vf_s *dpi, uint64_t *cmds, int cmd_count)\n-{\n-\tuint64_t *ptr = dpi->chunk_base;\n-\n-\tif ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) || cmds == NULL)\n-\t\treturn -EINVAL;\n-\n-\t/*\n-\t * Normally there is plenty of room in the current buffer for the\n-\t * command\n-\t */\n-\tif (dpi->chunk_head + cmd_count < dpi->chunk_size_m1) {\n-\t\tptr += dpi->chunk_head;\n-\t\tdpi->chunk_head += cmd_count;\n-\t\twhile (cmd_count--)\n-\t\t\t*ptr++ = *cmds++;\n-\t} else {\n-\t\tuint64_t *new_buff = NULL;\n-\t\tint count;\n-\n-\t\tif (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {\n-\t\t\tplt_dpi_dbg(\"Failed to alloc next buffer from NPA\");\n-\t\t\treturn -ENOMEM;\n-\t\t}\n-\n-\t\t/*\n-\t\t * Figure out how many cmd words will fit in this buffer.\n-\t\t * One location will be needed for the next buffer pointer.\n-\t\t */\n-\t\tcount = dpi->chunk_size_m1 - dpi->chunk_head;\n-\t\tptr += dpi->chunk_head;\n-\t\tcmd_count -= count;\n-\t\twhile (count--)\n-\t\t\t*ptr++ = *cmds++;\n-\n-\t\t/*\n-\t\t * chunk next ptr is 2 DWORDS\n-\t\t * second DWORD is reserved.\n-\t\t */\n-\t\t*ptr++ = (uint64_t)new_buff;\n-\t\t*ptr = 0;\n-\n-\t\t/*\n-\t\t * The current buffer is full and has a link to the next\n-\t\t * buffers. Time to write the rest of the commands into the new\n-\t\t * buffer.\n-\t\t */\n-\t\tdpi->chunk_base = new_buff;\n-\t\tdpi->chunk_head = cmd_count;\n-\t\tptr = new_buff;\n-\t\twhile (cmd_count--)\n-\t\t\t*ptr++ = *cmds++;\n-\n-\t\t/* queue index may be greater than pool size */\n-\t\tif (dpi->chunk_head == dpi->chunk_size_m1) {\n-\t\t\tif (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {\n-\t\t\t\tplt_dpi_dbg(\"Failed to alloc next buffer from NPA\");\n-\t\t\t\treturn -ENOMEM;\n-\t\t\t}\n-\t\t\t/* Write next buffer address */\n-\t\t\t*ptr = (uint64_t)new_buff;\n-\t\t\tdpi->chunk_base = new_buff;\n-\t\t\tdpi->chunk_head = 0;\n-\t\t}\n-\t}\n-\n-\treturn 0;\n-}\n-\n-static int\n-cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length,\n-\t\t uint64_t flags)\n-{\n-\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tstruct cnxk_dpi_compl_s *comp_ptr;\n-\tuint64_t cmd[DPI_MAX_CMD_SIZE];\n-\trte_iova_t fptr, lptr;\n-\tint num_words = 0;\n-\tint rc;\n-\n-\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n-\theader->cn9k.ptr = (uint64_t)comp_ptr;\n-\tSTRM_INC(dpi_conf->c_desc, tail);\n-\n-\tif (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn -ENOSPC;\n-\t}\n-\n-\theader->cn9k.nfst = 1;\n-\theader->cn9k.nlst = 1;\n-\n-\t/*\n-\t * For inbound case, src pointers are last pointers.\n-\t * For all other cases, src pointers are first pointers.\n-\t */\n-\tif (header->cn9k.xtype == DPI_XTYPE_INBOUND) {\n-\t\tfptr = dst;\n-\t\tlptr = src;\n-\t} else {\n-\t\tfptr = src;\n-\t\tlptr = dst;\n-\t}\n-\n-\tcmd[0] = header->u[0];\n-\tcmd[1] = header->u[1];\n-\tcmd[2] = header->u[2];\n-\t/* word3 is always 0 */\n-\tnum_words += 4;\n-\tcmd[num_words++] = length;\n-\tcmd[num_words++] = fptr;\n-\tcmd[num_words++] = length;\n-\tcmd[num_words++] = lptr;\n-\n-\trc = __dpi_queue_write(dpivf, cmd, num_words);\n-\tif (unlikely(rc)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn rc;\n-\t}\n-\n-\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n-\t\trte_wmb();\n-\t\tplt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n-\t\tdpi_conf->stats.submitted++;\n-\t} else {\n-\t\tdpi_conf->pnum_words += num_words;\n-\t\tdpi_conf->pending++;\n-\t}\n-\n-\treturn dpi_conf->desc_idx++;\n-}\n-\n-static int\n-cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n-\t\t    const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags)\n-{\n-\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tconst struct rte_dma_sge *fptr, *lptr;\n-\tstruct cnxk_dpi_compl_s *comp_ptr;\n-\tuint64_t cmd[DPI_MAX_CMD_SIZE];\n-\tint num_words = 0;\n-\tint i, rc;\n-\n-\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n-\theader->cn9k.ptr = (uint64_t)comp_ptr;\n-\tSTRM_INC(dpi_conf->c_desc, tail);\n-\n-\tif (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn -ENOSPC;\n-\t}\n-\n-\t/*\n-\t * For inbound case, src pointers are last pointers.\n-\t * For all other cases, src pointers are first pointers.\n-\t */\n-\tif (header->cn9k.xtype == DPI_XTYPE_INBOUND) {\n-\t\theader->cn9k.nfst = nb_dst & DPI_MAX_POINTER;\n-\t\theader->cn9k.nlst = nb_src & DPI_MAX_POINTER;\n-\t\tfptr = &dst[0];\n-\t\tlptr = &src[0];\n-\t} else {\n-\t\theader->cn9k.nfst = nb_src & DPI_MAX_POINTER;\n-\t\theader->cn9k.nlst = nb_dst & DPI_MAX_POINTER;\n-\t\tfptr = &src[0];\n-\t\tlptr = &dst[0];\n-\t}\n-\n-\tcmd[0] = header->u[0];\n-\tcmd[1] = header->u[1];\n-\tcmd[2] = header->u[2];\n-\tnum_words += 4;\n-\tfor (i = 0; i < header->cn9k.nfst; i++) {\n-\t\tcmd[num_words++] = (uint64_t)fptr->length;\n-\t\tcmd[num_words++] = fptr->addr;\n-\t\tfptr++;\n-\t}\n-\n-\tfor (i = 0; i < header->cn9k.nlst; i++) {\n-\t\tcmd[num_words++] = (uint64_t)lptr->length;\n-\t\tcmd[num_words++] = lptr->addr;\n-\t\tlptr++;\n-\t}\n-\n-\trc = __dpi_queue_write(dpivf, cmd, num_words);\n-\tif (unlikely(rc)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn rc;\n-\t}\n-\n-\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n-\t\trte_wmb();\n-\t\tplt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n-\t\tdpi_conf->stats.submitted++;\n-\t} else {\n-\t\tdpi_conf->pnum_words += num_words;\n-\t\tdpi_conf->pending++;\n-\t}\n-\n-\treturn dpi_conf->desc_idx++;\n-}\n-\n-static int\n-cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,\n-\t\t  uint32_t length, uint64_t flags)\n-{\n-\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tstruct cnxk_dpi_compl_s *comp_ptr;\n-\tuint64_t cmd[DPI_MAX_CMD_SIZE];\n-\trte_iova_t fptr, lptr;\n-\tint num_words = 0;\n-\tint rc;\n-\n-\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n-\theader->cn10k.ptr = (uint64_t)comp_ptr;\n-\tSTRM_INC(dpi_conf->c_desc, tail);\n-\n-\tif (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn -ENOSPC;\n-\t}\n-\n-\theader->cn10k.nfst = 1;\n-\theader->cn10k.nlst = 1;\n-\n-\tfptr = src;\n-\tlptr = dst;\n-\n-\tcmd[0] = header->u[0];\n-\tcmd[1] = header->u[1];\n-\tcmd[2] = header->u[2];\n-\t/* word3 is always 0 */\n-\tnum_words += 4;\n-\tcmd[num_words++] = length;\n-\tcmd[num_words++] = fptr;\n-\tcmd[num_words++] = length;\n-\tcmd[num_words++] = lptr;\n-\n-\trc = __dpi_queue_write(dpivf, cmd, num_words);\n-\tif (unlikely(rc)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn rc;\n-\t}\n-\n-\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n-\t\trte_wmb();\n-\t\tplt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n-\t\tdpi_conf->stats.submitted++;\n-\t} else {\n-\t\tdpi_conf->pnum_words += num_words;\n-\t\tdpi_conf->pending++;\n-\t}\n-\n-\treturn dpi_conf->desc_idx++;\n-}\n-\n-static int\n-cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n-\t\t     const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,\n-\t\t     uint64_t flags)\n-{\n-\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n-\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n-\tunion dpi_instr_hdr_s *header = &dpi_conf->hdr;\n-\tconst struct rte_dma_sge *fptr, *lptr;\n-\tstruct cnxk_dpi_compl_s *comp_ptr;\n-\tuint64_t cmd[DPI_MAX_CMD_SIZE];\n-\tint num_words = 0;\n-\tint i, rc;\n-\n-\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n-\theader->cn10k.ptr = (uint64_t)comp_ptr;\n-\tSTRM_INC(dpi_conf->c_desc, tail);\n-\n-\tif (unlikely(dpi_conf->c_desc.tail == dpi_conf->c_desc.head)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn -ENOSPC;\n-\t}\n-\n-\theader->cn10k.nfst = nb_src & DPI_MAX_POINTER;\n-\theader->cn10k.nlst = nb_dst & DPI_MAX_POINTER;\n-\tfptr = &src[0];\n-\tlptr = &dst[0];\n-\n-\tcmd[0] = header->u[0];\n-\tcmd[1] = header->u[1];\n-\tcmd[2] = header->u[2];\n-\tnum_words += 4;\n-\n-\tfor (i = 0; i < header->cn10k.nfst; i++) {\n-\t\tcmd[num_words++] = (uint64_t)fptr->length;\n-\t\tcmd[num_words++] = fptr->addr;\n-\t\tfptr++;\n-\t}\n-\n-\tfor (i = 0; i < header->cn10k.nlst; i++) {\n-\t\tcmd[num_words++] = (uint64_t)lptr->length;\n-\t\tcmd[num_words++] = lptr->addr;\n-\t\tlptr++;\n-\t}\n-\n-\trc = __dpi_queue_write(dpivf, cmd, num_words);\n-\tif (unlikely(rc)) {\n-\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n-\t\treturn rc;\n-\t}\n-\n-\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n-\t\trte_wmb();\n-\t\tplt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n-\t\tdpi_conf->stats.submitted++;\n-\t} else {\n-\t\tdpi_conf->pnum_words += num_words;\n-\t\tdpi_conf->pending++;\n-\t}\n-\n-\treturn dpi_conf->desc_idx++;\n-}\n-\n static uint16_t\n cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, uint16_t *last_idx,\n \t\t      bool *has_error)\n@@ -880,17 +511,6 @@ cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)\n \treturn 0;\n }\n \n-static const struct rte_dma_dev_ops cn10k_dmadev_ops = {\n-\t.dev_close = cnxk_dmadev_close,\n-\t.dev_configure = cnxk_dmadev_configure,\n-\t.dev_info_get = cnxk_dmadev_info_get,\n-\t.dev_start = cnxk_dmadev_start,\n-\t.dev_stop = cnxk_dmadev_stop,\n-\t.stats_get = cnxk_stats_get,\n-\t.stats_reset = cnxk_stats_reset,\n-\t.vchan_setup = cn10k_dmadev_vchan_setup,\n-};\n-\n static const struct rte_dma_dev_ops cnxk_dmadev_ops = {\n \t.dev_close = cnxk_dmadev_close,\n \t.dev_configure = cnxk_dmadev_configure,\n@@ -941,12 +561,8 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de\n \tdmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;\n \tdmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity;\n \n-\tif (pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KA ||\n-\t    pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KAS ||\n-\t    pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KA ||\n-\t    pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KB ||\n-\t    pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KB) {\n-\t\tdmadev->dev_ops = &cn10k_dmadev_ops;\n+\tif (roc_model_is_cn10k()) {\n+\t\tdpivf->is_cn10k = true;\n \t\tdmadev->fp_obj->copy = cn10k_dmadev_copy;\n \t\tdmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg;\n \t}\ndiff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h\nindex 65f12d844d..c9032de779 100644\n--- a/drivers/dma/cnxk/cnxk_dmadev.h\n+++ b/drivers/dma/cnxk/cnxk_dmadev.h\n@@ -4,14 +4,27 @@\n #ifndef CNXK_DMADEV_H\n #define CNXK_DMADEV_H\n \n+#include <string.h>\n+#include <unistd.h>\n+\n+#include <bus_pci_driver.h>\n+#include <rte_common.h>\n+#include <rte_dmadev.h>\n+#include <rte_dmadev_pmd.h>\n+#include <rte_eal.h>\n+#include <rte_lcore.h>\n+#include <rte_mbuf_pool_ops.h>\n+#include <rte_mempool.h>\n+#include <rte_pci.h>\n+\n #include <roc_api.h>\n \n-#define DPI_MAX_POINTER\t     15\n-#define STRM_INC(s, var)     ((s).var = ((s).var + 1) & (s).max_cnt)\n-#define STRM_DEC(s, var)     ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1))\n-#define DPI_MAX_DESC\t     2048\n-#define DPI_MIN_DESC\t     2\n-#define MAX_VCHANS_PER_QUEUE 4\n+#define DPI_MAX_POINTER\t       15\n+#define STRM_INC(s, var)       ((s).var = ((s).var + 1) & (s).max_cnt)\n+#define STRM_DEC(s, var)       ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1))\n+#define DPI_MAX_DESC\t       2048\n+#define DPI_MIN_DESC\t       2\n+#define MAX_VCHANS_PER_QUEUE   4\n #define DPI_CMD_QUEUE_BUF_SIZE 4096\n #define DPI_CMD_QUEUE_BUFS     1024\n \n@@ -21,8 +34,51 @@\n #define DPI_REQ_CDATA 0xFF\n \n #define CNXK_DMA_POOL_MAX_CACHE_SZ (16)\n-#define CNXK_DPI_DEV_CONFIG (1ULL << 0)\n-#define CNXK_DPI_DEV_START  (1ULL << 1)\n+#define CNXK_DPI_DEV_CONFIG\t   (1ULL << 0)\n+#define CNXK_DPI_DEV_START\t   (1ULL << 1)\n+\n+union cnxk_dpi_instr_cmd {\n+\tuint64_t u;\n+\tstruct cn9k_dpi_instr_cmd {\n+\t\tuint64_t aura : 20;\n+\t\tuint64_t func : 16;\n+\t\tuint64_t pt : 2;\n+\t\tuint64_t reserved_102 : 1;\n+\t\tuint64_t pvfe : 1;\n+\t\tuint64_t fl : 1;\n+\t\tuint64_t ii : 1;\n+\t\tuint64_t fi : 1;\n+\t\tuint64_t ca : 1;\n+\t\tuint64_t csel : 1;\n+\t\tuint64_t reserved_109_111 : 3;\n+\t\tuint64_t xtype : 2;\n+\t\tuint64_t reserved_114_119 : 6;\n+\t\tuint64_t fport : 2;\n+\t\tuint64_t reserved_122_123 : 2;\n+\t\tuint64_t lport : 2;\n+\t\tuint64_t reserved_126_127 : 2;\n+\t\t/* Word 1 - End */\n+\t} cn9k;\n+\n+\tstruct cn10k_dpi_instr_cmd {\n+\t\tuint64_t nfst : 4;\n+\t\tuint64_t reserved_4_5 : 2;\n+\t\tuint64_t nlst : 4;\n+\t\tuint64_t reserved_10_11 : 2;\n+\t\tuint64_t pvfe : 1;\n+\t\tuint64_t reserved_13 : 1;\n+\t\tuint64_t func : 16;\n+\t\tuint64_t aura : 20;\n+\t\tuint64_t xtype : 2;\n+\t\tuint64_t reserved_52_53 : 2;\n+\t\tuint64_t pt : 2;\n+\t\tuint64_t fport : 2;\n+\t\tuint64_t reserved_58_59 : 2;\n+\t\tuint64_t lport : 2;\n+\t\tuint64_t reserved_62_63 : 2;\n+\t\t/* Word 0 - End */\n+\t} cn10k;\n+};\n \n struct cnxk_dpi_compl_s {\n \tuint64_t cdata;\n@@ -37,26 +93,39 @@ struct cnxk_dpi_cdesc_data_s {\n };\n \n struct cnxk_dpi_conf {\n-\tunion dpi_instr_hdr_s hdr;\n+\tunion cnxk_dpi_instr_cmd cmd;\n \tstruct cnxk_dpi_cdesc_data_s c_desc;\n \tuint16_t pnum_words;\n \tuint16_t pending;\n \tuint16_t desc_idx;\n-\tuint16_t pad0;\n \tstruct rte_dma_stats stats;\n \tuint64_t completed_offset;\n };\n \n struct cnxk_dpi_vf_s {\n+\t/* Fast path*/\n \tuint64_t *chunk_base;\n \tuint16_t chunk_head;\n \tuint16_t chunk_size_m1;\n \tstruct rte_mempool *chunk_pool;\n \tstruct cnxk_dpi_conf conf[MAX_VCHANS_PER_QUEUE];\n+\t/* Slow path */\n \tstruct roc_dpi rdpi;\n \tuint32_t aura;\n \tuint16_t num_vchans;\n \tuint16_t flag;\n+\tuint8_t is_cn10k;\n } __plt_cache_aligned;\n \n+int cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,\n+\t\t     uint32_t length, uint64_t flags);\n+int cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n+\t\t\tconst struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,\n+\t\t\tuint64_t flags);\n+int cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,\n+\t\t      uint32_t length, uint64_t flags);\n+int cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n+\t\t\t const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,\n+\t\t\t uint64_t flags);\n+\n #endif\ndiff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c\nnew file mode 100644\nindex 0000000000..db1e57bf51\n--- /dev/null\n+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c\n@@ -0,0 +1,455 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright (C) 2021 Marvell International Ltd.\n+ */\n+\n+#include <rte_vect.h>\n+\n+#include \"cnxk_dmadev.h\"\n+\n+#define DMA_DW_PER_SINGLE_CMD 8\n+#define DMA_HDR_LEN\t      4\n+#define DMA_CMD_LEN(src, dst) (DMA_HDR_LEN + (src << 1) + (dst << 1))\n+\n+static __plt_always_inline void\n+__dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n)\n+{\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n; i++)\n+\t\tdst[i] = src[i];\n+}\n+\n+static __plt_always_inline void\n+__dpi_cpy_scalar_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n)\n+{\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n; i++) {\n+\t\t*dst++ = src[i].length;\n+\t\t*dst++ = src[i].addr;\n+\t}\n+}\n+\n+static __plt_always_inline uint8_t\n+__dpi_cpy_scalar_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt)\n+{\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n && lmt; i++) {\n+\t\t*dst++ = src[i].length;\n+\t\t*dst++ = src[i].addr;\n+\t\tlmt -= 2;\n+\t}\n+\n+\treturn i;\n+}\n+\n+#if defined(RTE_ARCH_ARM64)\n+static __plt_always_inline void\n+__dpi_cpy_vector(uint64_t *src, uint64_t *dst, uint8_t n)\n+{\n+\tuint64x2_t vec;\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n; i += 2) {\n+\t\tvec = vld1q_u64((const uint64_t *)&src[i]);\n+\t\tvst1q_u64(&dst[i], vec);\n+\t}\n+}\n+\n+static __plt_always_inline void\n+__dpi_cpy_vector_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n)\n+{\n+\tuint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};\n+\tuint64x2_t vec;\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n; i++) {\n+\t\tvec = vld1q_u64((const uint64_t *)&src[i]);\n+\t\tvec = vextq_u64(vec, vec, 1);\n+\t\tvec = vandq_u64(vec, mask);\n+\t\tvst1q_u64(dst, vec);\n+\t\tdst += 2;\n+\t}\n+}\n+\n+static __plt_always_inline uint8_t\n+__dpi_cpy_vector_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt)\n+{\n+\tuint64x2_t mask = {0xFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};\n+\tuint64x2_t vec;\n+\tuint8_t i;\n+\n+\tfor (i = 0; i < n && lmt; i++) {\n+\t\tvec = vld1q_u64((const uint64_t *)&src[i]);\n+\t\tvec = vextq_u64(vec, vec, 1);\n+\t\tvec = vandq_u64(vec, mask);\n+\t\tvst1q_u64(dst, vec);\n+\t\tdst += 2;\n+\t\tlmt -= 2;\n+\t}\n+\n+\treturn i;\n+}\n+#endif\n+\n+static __plt_always_inline void\n+__dpi_cpy(uint64_t *src, uint64_t *dst, uint8_t n)\n+{\n+#if defined(RTE_ARCH_ARM64)\n+\t__dpi_cpy_vector(src, dst, n);\n+#else\n+\t__dpi_cpy_scalar(src, dst, n);\n+#endif\n+}\n+\n+static __plt_always_inline void\n+__dpi_cpy_sg(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n)\n+{\n+#if defined(RTE_ARCH_ARM64)\n+\t__dpi_cpy_vector_sg(src, dst, n);\n+#else\n+\t__dpi_cpy_scalar_sg(src, dst, n);\n+#endif\n+}\n+\n+static __plt_always_inline uint8_t\n+__dpi_cpy_sg_lmt(const struct rte_dma_sge *src, uint64_t *dst, uint16_t n, uint16_t lmt)\n+{\n+#if defined(RTE_ARCH_ARM64)\n+\treturn __dpi_cpy_vector_sg_lmt(src, dst, n, lmt);\n+#else\n+\treturn __dpi_cpy_scalar_sg_lmt(src, dst, n, lmt);\n+#endif\n+}\n+\n+static __plt_always_inline int\n+__dpi_queue_write_single(struct cnxk_dpi_vf_s *dpi, uint64_t *cmd)\n+{\n+\tuint64_t *ptr = dpi->chunk_base;\n+\n+\t/*\n+\t * Normally there is plenty of room in the current buffer for the\n+\t * command\n+\t */\n+\tif (dpi->chunk_head + DMA_DW_PER_SINGLE_CMD < dpi->chunk_size_m1) {\n+\t\tptr += dpi->chunk_head;\n+\n+\t\t__dpi_cpy_scalar(cmd, ptr, DMA_DW_PER_SINGLE_CMD);\n+\t\tdpi->chunk_head += DMA_DW_PER_SINGLE_CMD;\n+\t} else {\n+\t\tuint64_t *new_buff = NULL;\n+\t\tint count;\n+\n+\t\tif (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {\n+\t\t\tplt_dpi_dbg(\"Failed to alloc next buffer from NPA\");\n+\t\t\treturn -ENOSPC;\n+\t\t}\n+\n+\t\t/*\n+\t\t * Figure out how many cmd words will fit in this buffer.\n+\t\t * One location will be needed for the next buffer pointer.\n+\t\t */\n+\t\tcount = dpi->chunk_size_m1 - dpi->chunk_head;\n+\t\tptr += dpi->chunk_head;\n+\n+\t\t__dpi_cpy_scalar(cmd, ptr, count);\n+\n+\t\tptr += count;\n+\t\t*ptr = (uint64_t)new_buff;\n+\t\tptr = new_buff;\n+\n+\t\t__dpi_cpy_scalar(cmd + count, ptr, DMA_DW_PER_SINGLE_CMD - count);\n+\n+\t\t/*\n+\t\t * The current buffer is full and has a link to the next\n+\t\t * buffers. Time to write the rest of the commands into\n+\t\t * the new buffer.\n+\t\t */\n+\t\tdpi->chunk_base = new_buff;\n+\t\tdpi->chunk_head = DMA_DW_PER_SINGLE_CMD - count;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static __plt_always_inline int\n+__dpi_queue_write_sg(struct cnxk_dpi_vf_s *dpi, uint64_t *hdr, const struct rte_dma_sge *src,\n+\t\t     const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst)\n+{\n+\tuint8_t cmd_len = DMA_CMD_LEN(nb_src, nb_dst);\n+\tuint64_t *ptr = dpi->chunk_base;\n+\n+\t/*\n+\t * Normally there is plenty of room in the current buffer for the\n+\t * command\n+\t */\n+\tif (dpi->chunk_head + cmd_len < dpi->chunk_size_m1) {\n+\t\tptr += dpi->chunk_head;\n+\n+\t\t__dpi_cpy(hdr, ptr, DMA_HDR_LEN);\n+\t\tptr += DMA_HDR_LEN;\n+\t\t__dpi_cpy_sg(src, ptr, nb_src);\n+\t\tptr += (nb_src << 1);\n+\t\t__dpi_cpy_sg(dst, ptr, nb_dst);\n+\n+\t\tdpi->chunk_head += cmd_len;\n+\t} else {\n+\t\tuint64_t *new_buff = NULL, *buf;\n+\t\tuint16_t count;\n+\n+\t\tif (rte_mempool_get(dpi->chunk_pool, (void **)&new_buff) < 0) {\n+\t\t\tplt_dpi_dbg(\"Failed to alloc next buffer from NPA\");\n+\t\t\treturn -ENOSPC;\n+\t\t}\n+\n+\t\t/*\n+\t\t * Figure out how many cmd words will fit in this buffer.\n+\t\t * One location will be needed for the next buffer pointer.\n+\t\t */\n+\t\tcount = dpi->chunk_size_m1 - dpi->chunk_head;\n+\t\tptr += dpi->chunk_head;\n+\t\tbuf = new_buff;\n+\t\tif (count <= 4) {\n+\t\t\t__dpi_cpy(hdr, ptr, count);\n+\t\t\tptr += count;\n+\t\t\t__dpi_cpy(&hdr[count], buf, 4);\n+\t\t\tbuf += (4 - count);\n+\t\t} else {\n+\t\t\tuint8_t i;\n+\n+\t\t\t__dpi_cpy(hdr, ptr, 4);\n+\t\t\tptr += 4;\n+\t\t\tcount -= 4;\n+\n+\t\t\ti = __dpi_cpy_sg_lmt(src, ptr, nb_src, count);\n+\t\t\tsrc += i;\n+\t\t\tnb_src -= i;\n+\t\t\tcount -= (i << 1);\n+\t\t\tptr += (i << 1);\n+\n+\t\t\ti = __dpi_cpy_sg_lmt(dst, ptr, nb_dst, count);\n+\t\t\tdst += i;\n+\t\t\tnb_dst -= i;\n+\t\t\tptr += (i << 1);\n+\t\t}\n+\t\t*ptr = (uint64_t)new_buff;\n+\n+\t\t__dpi_cpy_sg(src, buf, nb_src);\n+\t\tbuf += (nb_src << 1);\n+\n+\t\t__dpi_cpy_sg(dst, buf, nb_dst);\n+\t\tbuf += (nb_dst << 1);\n+\n+\t\t/*\n+\t\t * The current buffer is full and has a link to the next\n+\t\t * buffers. Time to write the rest of the commands into\n+\t\t * the new buffer.\n+\t\t */\n+\t\tdpi->chunk_base = new_buff;\n+\t\tdpi->chunk_head = buf - new_buff;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+int\n+cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length,\n+\t\t uint64_t flags)\n+{\n+\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n+\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n+\tuint64_t cmd[DMA_DW_PER_SINGLE_CMD];\n+\tstruct cnxk_dpi_compl_s *comp_ptr;\n+\tint rc;\n+\n+\tif (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==\n+\t\t     dpi_conf->c_desc.head))\n+\t\treturn -ENOSPC;\n+\n+\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n+\tSTRM_INC(dpi_conf->c_desc, tail);\n+\n+\tcmd[0] = (1UL << 54) | (1UL << 48);\n+\tcmd[1] = dpi_conf->cmd.u;\n+\tcmd[2] = (uint64_t)comp_ptr;\n+\tcmd[4] = length;\n+\tcmd[6] = length;\n+\n+\t/*\n+\t * For inbound case, src pointers are last pointers.\n+\t * For all other cases, src pointers are first pointers.\n+\t */\n+\tif (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {\n+\t\tcmd[5] = dst;\n+\t\tcmd[7] = src;\n+\t} else {\n+\t\tcmd[5] = src;\n+\t\tcmd[7] = dst;\n+\t}\n+\n+\trc = __dpi_queue_write_single(dpivf, cmd);\n+\tif (unlikely(rc)) {\n+\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n+\t\treturn rc;\n+\t}\n+\n+\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n+\t\trte_wmb();\n+\t\tplt_write64(dpi_conf->pnum_words + DMA_DW_PER_SINGLE_CMD,\n+\t\t\t    dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n+\t\tdpi_conf->stats.submitted += dpi_conf->pending + 1;\n+\t\tdpi_conf->pnum_words = 0;\n+\t\tdpi_conf->pending = 0;\n+\t} else {\n+\t\tdpi_conf->pnum_words += DMA_DW_PER_SINGLE_CMD;\n+\t\tdpi_conf->pending++;\n+\t}\n+\n+\treturn dpi_conf->desc_idx++;\n+}\n+\n+int\n+cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n+\t\t    const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags)\n+{\n+\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n+\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n+\tconst struct rte_dma_sge *fptr, *lptr;\n+\tstruct cnxk_dpi_compl_s *comp_ptr;\n+\tuint64_t hdr[4];\n+\tint rc;\n+\n+\tif (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==\n+\t\t     dpi_conf->c_desc.head))\n+\t\treturn -ENOSPC;\n+\n+\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n+\tSTRM_INC(dpi_conf->c_desc, tail);\n+\n+\thdr[1] = dpi_conf->cmd.u;\n+\thdr[2] = (uint64_t)comp_ptr;\n+\n+\t/*\n+\t * For inbound case, src pointers are last pointers.\n+\t * For all other cases, src pointers are first pointers.\n+\t */\n+\tif (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {\n+\t\tfptr = dst;\n+\t\tlptr = src;\n+\t\tRTE_SWAP(nb_src, nb_dst);\n+\t} else {\n+\t\tfptr = src;\n+\t\tlptr = dst;\n+\t}\n+\thdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;\n+\n+\trc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst);\n+\tif (unlikely(rc)) {\n+\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n+\t\treturn rc;\n+\t}\n+\n+\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n+\t\trte_wmb();\n+\t\tplt_write64(dpi_conf->pnum_words + DMA_CMD_LEN(nb_src, nb_dst),\n+\t\t\t    dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n+\t\tdpi_conf->stats.submitted += dpi_conf->pending + 1;\n+\t\tdpi_conf->pnum_words = 0;\n+\t\tdpi_conf->pending = 0;\n+\t} else {\n+\t\tdpi_conf->pnum_words += DMA_CMD_LEN(nb_src, nb_dst);\n+\t\tdpi_conf->pending++;\n+\t}\n+\n+\treturn dpi_conf->desc_idx++;\n+}\n+\n+int\n+cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,\n+\t\t  uint32_t length, uint64_t flags)\n+{\n+\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n+\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n+\tuint64_t cmd[DMA_DW_PER_SINGLE_CMD];\n+\tstruct cnxk_dpi_compl_s *comp_ptr;\n+\tint rc;\n+\n+\tif (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==\n+\t\t     dpi_conf->c_desc.head))\n+\t\treturn -ENOSPC;\n+\n+\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n+\tSTRM_INC(dpi_conf->c_desc, tail);\n+\n+\tcmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;\n+\tcmd[1] = (uint64_t)comp_ptr;\n+\tcmd[2] = 0;\n+\tcmd[4] = length;\n+\tcmd[5] = src;\n+\tcmd[6] = length;\n+\tcmd[7] = dst;\n+\n+\trc = __dpi_queue_write_single(dpivf, cmd);\n+\tif (unlikely(rc)) {\n+\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n+\t\treturn rc;\n+\t}\n+\n+\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n+\t\trte_wmb();\n+\t\tplt_write64(dpi_conf->pnum_words + DMA_DW_PER_SINGLE_CMD,\n+\t\t\t    dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n+\t\tdpi_conf->stats.submitted += dpi_conf->pending + 1;\n+\t\tdpi_conf->pnum_words = 0;\n+\t\tdpi_conf->pending = 0;\n+\t} else {\n+\t\tdpi_conf->pnum_words += 8;\n+\t\tdpi_conf->pending++;\n+\t}\n+\n+\treturn dpi_conf->desc_idx++;\n+}\n+\n+int\n+cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,\n+\t\t     const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,\n+\t\t     uint64_t flags)\n+{\n+\tstruct cnxk_dpi_vf_s *dpivf = dev_private;\n+\tstruct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];\n+\tstruct cnxk_dpi_compl_s *comp_ptr;\n+\tuint64_t hdr[4];\n+\tint rc;\n+\n+\tif (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==\n+\t\t     dpi_conf->c_desc.head))\n+\t\treturn -ENOSPC;\n+\n+\tcomp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];\n+\tSTRM_INC(dpi_conf->c_desc, tail);\n+\n+\thdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;\n+\thdr[1] = (uint64_t)comp_ptr;\n+\thdr[2] = 0;\n+\n+\trc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst);\n+\tif (unlikely(rc)) {\n+\t\tSTRM_DEC(dpi_conf->c_desc, tail);\n+\t\treturn rc;\n+\t}\n+\n+\tif (flags & RTE_DMA_OP_FLAG_SUBMIT) {\n+\t\trte_wmb();\n+\t\tplt_write64(dpi_conf->pnum_words + DMA_CMD_LEN(nb_src, nb_dst),\n+\t\t\t    dpivf->rdpi.rbase + DPI_VDMA_DBELL);\n+\t\tdpi_conf->stats.submitted += dpi_conf->pending + 1;\n+\t\tdpi_conf->pnum_words = 0;\n+\t\tdpi_conf->pending = 0;\n+\t} else {\n+\t\tdpi_conf->pnum_words += DMA_CMD_LEN(nb_src, nb_dst);\n+\t\tdpi_conf->pending++;\n+\t}\n+\n+\treturn dpi_conf->desc_idx++;\n+}\ndiff --git a/drivers/dma/cnxk/meson.build b/drivers/dma/cnxk/meson.build\nindex b868fb14cb..a35b3a3b70 100644\n--- a/drivers/dma/cnxk/meson.build\n+++ b/drivers/dma/cnxk/meson.build\n@@ -2,5 +2,5 @@\n # Copyright(C) 2021 Marvell International Ltd.\n \n deps += ['bus_pci', 'common_cnxk', 'dmadev']\n-sources = files('cnxk_dmadev.c')\n+sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c')\n require_iova_in_mbuf = false\n",
    "prefixes": [
        "2/2"
    ]
}