get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/120201/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 120201,
    "url": "http://patchwork.dpdk.org/api/patches/120201/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20221128095442.3185112-5-ndabilpuram@marvell.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20221128095442.3185112-5-ndabilpuram@marvell.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20221128095442.3185112-5-ndabilpuram@marvell.com",
    "date": "2022-11-28T09:54:36",
    "name": "[05/11] event/cnxk: net/cnxk: support transmit completion",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "ae08c7f6aac83c5a73fb679854884eb10efaae0e",
    "submitter": {
        "id": 1202,
        "url": "http://patchwork.dpdk.org/api/people/1202/?format=api",
        "name": "Nithin Dabilpuram",
        "email": "ndabilpuram@marvell.com"
    },
    "delegate": {
        "id": 310,
        "url": "http://patchwork.dpdk.org/api/users/310/?format=api",
        "username": "jerin",
        "first_name": "Jerin",
        "last_name": "Jacob",
        "email": "jerinj@marvell.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20221128095442.3185112-5-ndabilpuram@marvell.com/mbox/",
    "series": [
        {
            "id": 25906,
            "url": "http://patchwork.dpdk.org/api/series/25906/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=25906",
            "date": "2022-11-28T09:54:32",
            "name": "[01/11] common/cnxk: free pending sqe buffers",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/25906/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/120201/comments/",
    "check": "warning",
    "checks": "http://patchwork.dpdk.org/api/patches/120201/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id E103BA00C3;\n\tMon, 28 Nov 2022 10:55:19 +0100 (CET)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 4355042C76;\n\tMon, 28 Nov 2022 10:55:07 +0100 (CET)",
            "from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com\n [67.231.148.174])\n by mails.dpdk.org (Postfix) with ESMTP id 1125E42BAC\n for <dev@dpdk.org>; Mon, 28 Nov 2022 10:55:05 +0100 (CET)",
            "from pps.filterd (m0045849.ppops.net [127.0.0.1])\n by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id\n 2AS5ifW6025002 for <dev@dpdk.org>; Mon, 28 Nov 2022 01:55:05 -0800",
            "from dc5-exch01.marvell.com ([199.233.59.181])\n by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3m4q3srsey-3\n (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)\n for <dev@dpdk.org>; Mon, 28 Nov 2022 01:55:04 -0800",
            "from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.2;\n Mon, 28 Nov 2022 01:55:03 -0800",
            "from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com\n (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend\n Transport; Mon, 28 Nov 2022 01:55:03 -0800",
            "from hyd1588t430.caveonetworks.com (unknown [10.29.52.204])\n by maili.marvell.com (Postfix) with ESMTP id DFBD23F704D;\n Mon, 28 Nov 2022 01:54:59 -0800 (PST)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;\n h=from : to : cc :\n subject : date : message-id : in-reply-to : references : mime-version :\n content-transfer-encoding : content-type; s=pfpt0220;\n bh=nk/tfiZ/p/3U/Wc2f8NwI5cLbLP3kb1lshagDxKWCr8=;\n b=BHp5X9aVj8tbnfe3SdJYK653g5pnu0e6UhgE0qhEPh5wBUurfmfm7n5CQ7CpQqh9ayHs\n bUOwKW2Mrua49Iy3TRe59Sj1Vh5JSR6D7aRjQq1EYKbzs7hAAaxSlPXf4Zi8nNkUEEvD\n AjmDSbd0WtbKEGoDXKIvzLFWUOzFH8Zmv+xgAMKP5baCYkcpLzJAJhwcFW85CQyzuEie\n gI/YU8sUOobvtpoN0i7uCS41+vVbAfMv9iu1iSHv2m3tcAdrQw2kdAsWn7RigOSCx3vu\n l94BEsI6JQGY1DRH6p5Tugxr8i/KunJ/pCE3Qwa4xiHGNvsdPqC5dHy2oxKXTbmBoL5D wg==",
        "From": "Nithin Dabilpuram <ndabilpuram@marvell.com>",
        "To": "Nithin Dabilpuram <ndabilpuram@marvell.com>, Kiran Kumar K\n <kirankumark@marvell.com>, Sunil Kumar Kori <skori@marvell.com>, Satha Rao\n <skoteshwar@marvell.com>, Pavan Nikhilesh <pbhagavatula@marvell.com>,\n \"Shijith Thotton\" <sthotton@marvell.com>",
        "CC": "<jerinj@marvell.com>, <dev@dpdk.org>, Rakesh Kudurumalla\n <rkudurumalla@marvell.com>",
        "Subject": "[PATCH 05/11] event/cnxk: net/cnxk: support transmit completion",
        "Date": "Mon, 28 Nov 2022 15:24:36 +0530",
        "Message-ID": "<20221128095442.3185112-5-ndabilpuram@marvell.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20221128095442.3185112-1-ndabilpuram@marvell.com>",
        "References": "<20221128095442.3185112-1-ndabilpuram@marvell.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Proofpoint-ORIG-GUID": "0teo4WmPNVnjoqP2vJkVpHCouVGYUYcC",
        "X-Proofpoint-GUID": "0teo4WmPNVnjoqP2vJkVpHCouVGYUYcC",
        "X-Proofpoint-Virus-Version": "vendor=baseguard\n engine=ICAP:2.0.219,Aquarius:18.0.895,Hydra:6.0.545,FMLib:17.11.122.1\n definitions=2022-11-28_07,2022-11-25_01,2022-06-22_01",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "From: Rakesh Kudurumalla <rkudurumalla@marvell.com>\n\nadded support to call callback handler provided\nby user when external buffer is attached to mbuf\nand transmit completion is enabled.Added support to\nenable transmit completion as device args\n\nSigned-off-by: Rakesh Kudurumalla <rkudurumalla@marvell.com>\n---\n doc/guides/nics/cnxk.rst               |  14 ++\n drivers/event/cnxk/cn10k_worker.h      |   7 +-\n drivers/event/cnxk/cn9k_worker.h       |   8 +-\n drivers/net/cnxk/cn10k_ethdev.c        |  54 ++++++\n drivers/net/cnxk/cn10k_ethdev.h        |   1 +\n drivers/net/cnxk/cn10k_tx.h            | 215 +++++++++++++++++++----\n drivers/net/cnxk/cn9k_ethdev.c         |  54 ++++++\n drivers/net/cnxk/cn9k_ethdev.h         |   1 +\n drivers/net/cnxk/cn9k_tx.h             | 226 +++++++++++++++++++++----\n drivers/net/cnxk/cnxk_ethdev.c         |  28 ++-\n drivers/net/cnxk/cnxk_ethdev.h         |  17 ++\n drivers/net/cnxk/cnxk_ethdev_devargs.c |   6 +\n 12 files changed, 553 insertions(+), 78 deletions(-)",
    "diff": "diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst\nindex 7da6cb3967..be176b53a2 100644\n--- a/doc/guides/nics/cnxk.rst\n+++ b/doc/guides/nics/cnxk.rst\n@@ -361,6 +361,20 @@ Runtime Config Options\n \n       -a 0002:1d:00.0,sdp_channel_mask=0x700/0xf00\n \n+- ``Transmit completion handler`` (default ``0``)\n+\n+   When transmit completion handler is enabled , PMD invokes callback handler\n+   provided by application for every packet which has external buf attached to mbuf\n+   and frees main mbuf, external buffer is provided to applicatoin. Once external\n+   buffer is handed over to application, its application responsibility either to\n+   free of reuse external buffer\n+\n+   using ``tx_compl_ena`` ``devargs`` parameter.\n+\n+   For example::\n+\n+      -a 0002:01:00.1,tx_compl_ena=1\n+\n    With the above configuration, RTE Flow rules API will set the channel\n    and channel mask as 0x700 and 0xF00 in the MCAM entries of the  flow rules\n    created on the SDP device. This option needs to be used when more than one\ndiff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h\nindex 75a2ff244a..332a2e27c2 100644\n--- a/drivers/event/cnxk/cn10k_worker.h\n+++ b/drivers/event/cnxk/cn10k_worker.h\n@@ -559,6 +559,9 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,\n \tif (cn10k_sso_sq_depth(txq) <= 0)\n \t\treturn 0;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, 1, 1);\n+\n \tcn10k_nix_tx_skeleton(txq, cmd, flags, 0);\n \t/* Perform header writes before barrier\n \t * for TSO\n@@ -566,7 +569,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,\n \tif (flags & NIX_TX_OFFLOAD_TSO_F)\n \t\tcn10k_nix_xmit_prepare_tso(m, flags);\n \n-\tcn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec,\n+\tcn10k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, &sec,\n \t\t\t       txq->mark_flag, txq->mark_fmt);\n \n \tladdr = lmt_addr;\n@@ -581,7 +584,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,\n \tcn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);\n \n \tif (flags & NIX_TX_MULTI_SEG_F)\n-\t\tsegdw = cn10k_nix_prepare_mseg(m, (uint64_t *)laddr, flags);\n+\t\tsegdw = cn10k_nix_prepare_mseg(txq, m, (uint64_t *)laddr, flags);\n \telse\n \t\tsegdw = cn10k_nix_tx_ext_subs(flags) + 2;\n \ndiff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h\nindex 4c3932da47..54213db3b4 100644\n--- a/drivers/event/cnxk/cn9k_worker.h\n+++ b/drivers/event/cnxk/cn9k_worker.h\n@@ -781,12 +781,16 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,\n \t    !(flags & NIX_TX_OFFLOAD_SECURITY_F))\n \t\trte_io_wmb();\n \ttxq = cn9k_sso_hws_xtract_meta(m, txq_data);\n+\n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, 1, 1);\n+\n \tif (((txq->nb_sqb_bufs_adj -\n \t      __atomic_load_n((int16_t *)txq->fc_mem, __ATOMIC_RELAXED))\n \t     << txq->sqes_per_sqb_log2) <= 0)\n \t\treturn 0;\n \tcn9k_nix_tx_skeleton(txq, cmd, flags, 0);\n-\tcn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,\n+\tcn9k_nix_xmit_prepare(txq, m, cmd, flags, txq->lso_tun_fmt, txq->mark_flag,\n \t\t\t      txq->mark_fmt);\n \n \tif (flags & NIX_TX_OFFLOAD_SECURITY_F) {\n@@ -808,7 +812,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,\n \t}\n \n \tif (flags & NIX_TX_MULTI_SEG_F) {\n-\t\tconst uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);\n+\t\tconst uint16_t segdw = cn9k_nix_prepare_mseg(txq, m, cmd, flags);\n \t\tcn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, segdw,\n \t\t\t\t\t     flags);\n \t\tif (!CNXK_TT_FROM_EVENT(ev->event)) {\ndiff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c\nindex 4658713591..61278bb72c 100644\n--- a/drivers/net/cnxk/cn10k_ethdev.c\n+++ b/drivers/net/cnxk/cn10k_ethdev.c\n@@ -50,6 +50,7 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)\n {\n \tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n \tuint64_t conf = dev->tx_offloads;\n+\tstruct roc_nix *nix = &dev->nix;\n \tuint16_t flags = 0;\n \n \t/* Fastpath is dependent on these enums */\n@@ -113,6 +114,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)\n \tif (dev->tx_mark)\n \t\tflags |= NIX_TX_OFFLOAD_VLAN_QINQ_F;\n \n+\tif (nix->tx_compl_ena)\n+\t\tflags |= NIX_TX_OFFLOAD_MBUF_NOFF_F;\n+\n \treturn flags;\n }\n \n@@ -165,6 +169,49 @@ nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn10k_eth_txq *txq,\n \trte_wmb();\n }\n \n+static int\n+cn10k_nix_tx_compl_setup(struct cnxk_eth_dev *dev,\n+\t\tstruct cn10k_eth_txq *txq,\n+\t\tstruct roc_nix_sq *sq, uint16_t nb_desc)\n+{\n+\tstruct roc_nix_cq *cq;\n+\n+\tcq = &dev->cqs[sq->cqid];\n+\ttxq->tx_compl.desc_base = (uintptr_t)cq->desc_base;\n+\ttxq->tx_compl.cq_door = cq->door;\n+\ttxq->tx_compl.cq_status = cq->status;\n+\ttxq->tx_compl.wdata = cq->wdata;\n+\ttxq->tx_compl.head = cq->head;\n+\ttxq->tx_compl.qmask = cq->qmask;\n+\t/* Total array size holding buffers is equal to\n+\t * number of entries in cq and sq\n+\t * max buffer in array = desc in cq + desc in sq\n+\t */\n+\ttxq->tx_compl.nb_desc_mask = (2 * rte_align32pow2(nb_desc)) - 1;\n+\ttxq->tx_compl.ena = true;\n+\n+\ttxq->tx_compl.ptr = (struct rte_mbuf **)plt_zmalloc(txq->tx_compl.nb_desc_mask *\n+\t\t\tsizeof(struct rte_mbuf *), 0);\n+\tif (!txq->tx_compl.ptr)\n+\t\treturn -1;\n+\n+\treturn 0;\n+}\n+\n+static void\n+cn10k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)\n+{\n+\tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n+\tstruct roc_nix *nix = &dev->nix;\n+\tstruct cn10k_eth_txq *txq;\n+\n+\tcnxk_nix_tx_queue_release(eth_dev, qid);\n+\ttxq = eth_dev->data->tx_queues[qid];\n+\n+\tif (nix->tx_compl_ena)\n+\t\tplt_free(txq->tx_compl.ptr);\n+}\n+\n static int\n cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \t\t\t uint16_t nb_desc, unsigned int socket,\n@@ -191,6 +238,12 @@ cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \t/* Update fast path queue */\n \ttxq = eth_dev->data->tx_queues[qid];\n \ttxq->fc_mem = sq->fc;\n+\tif (nix->tx_compl_ena) {\n+\t\trc = cn10k_nix_tx_compl_setup(dev, txq, sq, nb_desc);\n+\t\tif (rc)\n+\t\t\treturn rc;\n+\t}\n+\n \t/* Store lmt base in tx queue for easy access */\n \ttxq->lmt_base = nix->lmt_base;\n \ttxq->io_addr = sq->io_addr;\n@@ -711,6 +764,7 @@ nix_eth_dev_ops_override(void)\n \tcnxk_eth_dev_ops.dev_configure = cn10k_nix_configure;\n \tcnxk_eth_dev_ops.tx_queue_setup = cn10k_nix_tx_queue_setup;\n \tcnxk_eth_dev_ops.rx_queue_setup = cn10k_nix_rx_queue_setup;\n+\tcnxk_eth_dev_ops.tx_queue_release = cn10k_nix_tx_queue_release;\n \tcnxk_eth_dev_ops.tx_queue_stop = cn10k_nix_tx_queue_stop;\n \tcnxk_eth_dev_ops.dev_start = cn10k_nix_dev_start;\n \tcnxk_eth_dev_ops.dev_ptypes_set = cn10k_nix_ptypes_set;\ndiff --git a/drivers/net/cnxk/cn10k_ethdev.h b/drivers/net/cnxk/cn10k_ethdev.h\nindex 948c8348ad..c843ba9881 100644\n--- a/drivers/net/cnxk/cn10k_ethdev.h\n+++ b/drivers/net/cnxk/cn10k_ethdev.h\n@@ -24,6 +24,7 @@ struct cn10k_eth_txq {\n \tuint64_t ts_mem;\n \tuint64_t mark_flag : 8;\n \tuint64_t mark_fmt : 48;\n+\tstruct cnxk_eth_txq_comp tx_compl;\n } __plt_cache_aligned;\n \n struct cn10k_eth_rxq {\ndiff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h\nindex 09c332b2b5..c51de742ad 100644\n--- a/drivers/net/cnxk/cn10k_tx.h\n+++ b/drivers/net/cnxk/cn10k_tx.h\n@@ -643,6 +643,28 @@ cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,\n }\n #endif\n \n+static __rte_always_inline uint64_t\n+cn10k_nix_prefree_seg(struct rte_mbuf *m, struct cn10k_eth_txq *txq,\n+\t\tstruct nix_send_hdr_s *send_hdr)\n+{\n+\tuint32_t sqe_id;\n+\n+\tif (RTE_MBUF_HAS_EXTBUF(m)) {\n+\t\tif (send_hdr->w0.pnc) {\n+\t\t\ttxq->tx_compl.ptr[send_hdr->w1.sqe_id]->next = m;\n+\t\t} else {\n+\t\t\tsqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED);\n+\t\t\tsend_hdr->w0.pnc = 1;\n+\t\t\tsend_hdr->w1.sqe_id = sqe_id &\n+\t\t\t\ttxq->tx_compl.nb_desc_mask;\n+\t\t\ttxq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;\n+\t\t}\n+\t\treturn 1;\n+\t} else {\n+\t\treturn cnxk_nix_prefree_seg(m);\n+\t}\n+}\n+\n static __rte_always_inline void\n cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)\n {\n@@ -696,7 +718,8 @@ cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)\n }\n \n static __rte_always_inline void\n-cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n+cn10k_nix_xmit_prepare(struct cn10k_eth_txq *txq,\n+\t\t       struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n \t\t       const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,\n \t\t       uint64_t mark_fmt)\n {\n@@ -888,7 +911,7 @@ cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n \t\t\t *\t\tis greater than 1\n \t\t\t * DF bit = 0 otherwise\n \t\t\t */\n-\t\t\tsend_hdr->w0.df = cnxk_nix_prefree_seg(m);\n+\t\t\tsend_hdr->w0.df = cn10k_nix_prefree_seg(m, txq, send_hdr);\n \t\t}\n \t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n \t\tif (!send_hdr->w0.df)\n@@ -959,7 +982,8 @@ cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,\n }\n \n static __rte_always_inline uint16_t\n-cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n+cn10k_nix_prepare_mseg(struct cn10k_eth_txq *txq,\n+\t\t       struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n {\n \tuint64_t prefree = 0, aura0, aura, nb_segs, segdw;\n \tstruct nix_send_hdr_s *send_hdr;\n@@ -993,7 +1017,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n \n \t/* Set invert df if buffer is not to be freed by H/W */\n \tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n-\t\tprefree = cnxk_nix_prefree_seg(m);\n+\t\tprefree = cn10k_nix_prefree_seg(m, txq, send_hdr);\n \t\tl_sg.i1 = prefree;\n \t}\n \n@@ -1035,7 +1059,7 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n \n \t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n \t\t\taura = roc_npa_aura_handle_to_aura(m->pool->pool_id);\n-\t\t\tprefree = cnxk_nix_prefree_seg(m);\n+\t\t\tprefree = cn10k_nix_prefree_seg(m, txq, send_hdr);\n \t\t\tis_sg2 = aura != aura0 && !prefree;\n \t\t}\n \n@@ -1119,6 +1143,83 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n \treturn segdw;\n }\n \n+static inline uint16_t\n+nix_tx_compl_nb_pkts(struct cn10k_eth_txq *txq, const uint64_t wdata,\n+\t\tconst uint16_t pkts, const uint32_t qmask)\n+{\n+\tuint32_t available = txq->tx_compl.available;\n+\n+\t/* Update the available count if cached value is not enough */\n+\tif (unlikely(available < pkts)) {\n+\t\tuint64_t reg, head, tail;\n+\n+\t\t/* Use LDADDA version to avoid reorder */\n+\t\treg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);\n+\t\t/* CQ_OP_STATUS operation error */\n+\t\tif (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) ||\n+\t\t\t\treg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))\n+\t\t\treturn 0;\n+\n+\t\ttail = reg & 0xFFFFF;\n+\t\thead = (reg >> 20) & 0xFFFFF;\n+\t\tif (tail < head)\n+\t\t\tavailable = tail - head + qmask + 1;\n+\t\telse\n+\t\t\tavailable = tail - head;\n+\n+\t\ttxq->tx_compl.available = available;\n+\t}\n+\treturn RTE_MIN(pkts, available);\n+}\n+\n+static inline void\n+handle_tx_completion_pkts(struct cn10k_eth_txq *txq, const uint16_t pkts,\n+\t\t\t  uint8_t mt_safe)\n+{\n+#define CNXK_NIX_CQ_ENTRY_SZ 128\n+#define CQE_SZ(x)            ((x) * CNXK_NIX_CQ_ENTRY_SZ)\n+\n+\tuint16_t tx_pkts = 0, nb_pkts;\n+\tconst uintptr_t desc = txq->tx_compl.desc_base;\n+\tconst uint64_t wdata = txq->tx_compl.wdata;\n+\tconst uint32_t qmask = txq->tx_compl.qmask;\n+\tuint32_t head = txq->tx_compl.head;\n+\tstruct nix_cqe_hdr_s *tx_compl_cq;\n+\tstruct nix_send_comp_s *tx_compl_s0;\n+\tstruct rte_mbuf *m_next, *m;\n+\n+\tif (mt_safe)\n+\t\trte_spinlock_lock(&txq->tx_compl.ext_buf_lock);\n+\n+\tnb_pkts = nix_tx_compl_nb_pkts(txq, wdata, pkts, qmask);\n+\twhile (tx_pkts < nb_pkts) {\n+\t\trte_prefetch_non_temporal((void *)(desc +\n+\t\t\t\t\t(CQE_SZ((head + 2) & qmask))));\n+\t\ttx_compl_cq = (struct nix_cqe_hdr_s *)\n+\t\t\t(desc + CQE_SZ(head));\n+\t\ttx_compl_s0 = (struct nix_send_comp_s *)\n+\t\t\t((uint64_t *)tx_compl_cq + 1);\n+\t\tm = txq->tx_compl.ptr[tx_compl_s0->sqe_id];\n+\t\twhile (m->next != NULL) {\n+\t\t\tm_next = m->next;\n+\t\t\trte_pktmbuf_free_seg(m);\n+\t\t\tm = m_next;\n+\t\t}\n+\t\trte_pktmbuf_free_seg(m);\n+\n+\t\thead++;\n+\t\thead &= qmask;\n+\t\ttx_pkts++;\n+\t}\n+\ttxq->tx_compl.head = head;\n+\ttxq->tx_compl.available -= nb_pkts;\n+\n+\tplt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);\n+\n+\tif (mt_safe)\n+\t\trte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);\n+}\n+\n static __rte_always_inline uint16_t\n cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,\n \t\t    uint16_t pkts, uint64_t *cmd, const uint16_t flags)\n@@ -1139,6 +1240,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,\n \tuint64_t data;\n \tbool sec;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);\n+\n \tif (!(flags & NIX_TX_VWQE_F)) {\n \t\tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \t\t/* Reduce the cached count */\n@@ -1181,7 +1285,7 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,\n \t\tif (flags & NIX_TX_OFFLOAD_TSO_F)\n \t\t\tcn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);\n \n-\t\tcn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,\n+\t\tcn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,\n \t\t\t\t       &sec, mark_flag, mark_fmt);\n \n \t\tladdr = (uintptr_t)LMT_OFF(lbase, lnum, 0);\n@@ -1285,6 +1389,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,\n \tuintptr_t laddr;\n \tbool sec;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);\n+\n \tif (!(flags & NIX_TX_VWQE_F)) {\n \t\tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \t\t/* Reduce the cached count */\n@@ -1331,7 +1438,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,\n \t\tif (flags & NIX_TX_OFFLOAD_TSO_F)\n \t\t\tcn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);\n \n-\t\tcn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,\n+\t\tcn10k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,\n \t\t\t\t       &sec, mark_flag, mark_fmt);\n \n \t\tladdr = (uintptr_t)LMT_OFF(lbase, lnum, 0);\n@@ -1345,7 +1452,7 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,\n \t\t/* Move NIX desc to LMT/NIXTX area */\n \t\tcn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);\n \t\t/* Store sg list directly on lmt line */\n-\t\tsegdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,\n+\t\tsegdw = cn10k_nix_prepare_mseg(txq, tx_pkts[i], (uint64_t *)laddr,\n \t\t\t\t\t       flags);\n \t\tcn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,\n \t\t\t\t\t      segdw, flags);\n@@ -1467,7 +1574,8 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n }\n \n static __rte_always_inline uint16_t\n-cn10k_nix_prepare_mseg_vec_noff(struct rte_mbuf *m, uint64_t *cmd,\n+cn10k_nix_prepare_mseg_vec_noff(struct cn10k_eth_txq *txq,\n+\t\t\t\tstruct rte_mbuf *m, uint64_t *cmd,\n \t\t\t\tuint64x2_t *cmd0, uint64x2_t *cmd1,\n \t\t\t\tuint64x2_t *cmd2, uint64x2_t *cmd3,\n \t\t\t\tconst uint32_t flags)\n@@ -1482,7 +1590,7 @@ cn10k_nix_prepare_mseg_vec_noff(struct rte_mbuf *m, uint64_t *cmd,\n \t\tvst1q_u64(cmd + 2, *cmd1); /* sg */\n \t}\n \n-\tsegdw = cn10k_nix_prepare_mseg(m, cmd, flags);\n+\tsegdw = cn10k_nix_prepare_mseg(txq, m, cmd, flags);\n \n \tif (flags & NIX_TX_OFFLOAD_TSTAMP_F)\n \t\tvst1q_u64(cmd + segdw * 2 - 2, *cmd3);\n@@ -1581,7 +1689,8 @@ cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n #define NIX_DESCS_PER_LOOP 4\n \n static __rte_always_inline uint8_t\n-cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,\n+cn10k_nix_prep_lmt_mseg_vector(struct cn10k_eth_txq *txq,\n+\t\t\t       struct rte_mbuf **mbufs, uint64x2_t *cmd0,\n \t\t\t       uint64x2_t *cmd1, uint64x2_t *cmd2,\n \t\t\t       uint64x2_t *cmd3, uint8_t *segdw,\n \t\t\t       uint64_t *lmt_addr, __uint128_t *data128,\n@@ -1599,7 +1708,7 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,\n \t\t\t\tlmt_addr += 16;\n \t\t\t\toff = 0;\n \t\t\t}\n-\t\t\toff += cn10k_nix_prepare_mseg_vec_noff(mbufs[j],\n+\t\t\toff += cn10k_nix_prepare_mseg_vec_noff(txq, mbufs[j],\n \t\t\t\t\tlmt_addr + off * 2, &cmd0[j], &cmd1[j],\n \t\t\t\t\t&cmd2[j], &cmd3[j], flags);\n \t\t}\n@@ -1741,14 +1850,15 @@ cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,\n }\n \n static __rte_always_inline void\n-cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,\n+cn10k_nix_xmit_store(struct cn10k_eth_txq *txq,\n+\t\t     struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,\n \t\t     uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,\n \t\t     uint64x2_t cmd3, const uint16_t flags)\n {\n \tuint8_t off;\n \n \tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n-\t\tcn10k_nix_prepare_mseg_vec_noff(mbuf, LMT_OFF(laddr, 0, 0),\n+\t\tcn10k_nix_prepare_mseg_vec_noff(txq, mbuf, LMT_OFF(laddr, 0, 0),\n \t\t\t\t\t\t&cmd0, &cmd1, &cmd2, &cmd3,\n \t\t\t\t\t\tflags);\n \t\treturn;\n@@ -1816,9 +1926,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \tuint64x2_t sgdesc01_w0, sgdesc23_w0;\n \tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n \tstruct cn10k_eth_txq *txq = tx_queue;\n+\tuint64x2_t xmask01_w0, xmask23_w0;\n+\tuint64x2_t xmask01_w1, xmask23_w1;\n \trte_iova_t io_addr = txq->io_addr;\n \tuintptr_t laddr = txq->lmt_base;\n \tuint8_t c_lnum, c_shft, c_loff;\n+\tstruct nix_send_hdr_s send_hdr;\n \tuint64x2_t ltypes01, ltypes23;\n \tuint64x2_t xtmp128, ytmp128;\n \tuint64x2_t xmask01, xmask23;\n@@ -1831,6 +1944,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\tuint64_t data[2];\n \t} wd;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, flags & NIX_TX_VWQE_F);\n+\n \tif (!(flags & NIX_TX_VWQE_F)) {\n \t\tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \t\tscalar = pkts & (NIX_DESCS_PER_LOOP - 1);\n@@ -2664,8 +2780,10 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t    !(flags & NIX_TX_MULTI_SEG_F) &&\n \t\t    !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n-\t\t\txmask01 = vdupq_n_u64(0);\n-\t\t\txmask23 = xmask01;\n+\t\t\txmask01_w0 = vdupq_n_u64(0);\n+\t\t\txmask01_w1 = vdupq_n_u64(0);\n+\t\t\txmask23_w0 = xmask01_w0;\n+\t\t\txmask23_w1 = xmask01_w1;\n \n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n@@ -2673,35 +2791,62 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\tmbuf2 = (uint64_t *)tx_pkts[2];\n \t\t\tmbuf3 = (uint64_t *)tx_pkts[3];\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))\n-\t\t\t\txmask01 = vsetq_lane_u64(0x80000, xmask01, 0);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf0, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 0);\n+\t\t\t\txmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 0);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf0)->pool,\n \t\t\t\t\t(void **)&mbuf0, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))\n-\t\t\t\txmask01 = vsetq_lane_u64(0x80000, xmask01, 1);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf1, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 1);\n+\t\t\t\txmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 1);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf1)->pool,\n \t\t\t\t\t(void **)&mbuf1, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))\n-\t\t\t\txmask23 = vsetq_lane_u64(0x80000, xmask23, 0);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf2, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 0);\n+\t\t\t\txmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 0);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf2)->pool,\n \t\t\t\t\t(void **)&mbuf2, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))\n-\t\t\t\txmask23 = vsetq_lane_u64(0x80000, xmask23, 1);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn10k_nix_prefree_seg((struct rte_mbuf *)mbuf3, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 1);\n+\t\t\t\txmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 1);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf3)->pool,\n \t\t\t\t\t(void **)&mbuf3, 1, 0);\n-\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n-\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t\t}\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01_w0);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23_w0);\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, xmask01_w1);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, xmask23_w1);\n \t\t} else if (!(flags & NIX_TX_MULTI_SEG_F) &&\n \t\t\t   !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {\n \t\t\t/* Move mbufs to iova */\n@@ -2773,7 +2918,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\t\t\t\t   &shift, &wd.data128, &next);\n \n \t\t\t/* Store mbuf0 to LMTLINE/CPT NIXTX area */\n-\t\t\tcn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,\n+\t\t\tcn10k_nix_xmit_store(txq, tx_pkts[0], segdw[0], next,\n \t\t\t\t\t     cmd0[0], cmd1[0], cmd2[0], cmd3[0],\n \t\t\t\t\t     flags);\n \n@@ -2789,7 +2934,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\t\t\t\t   &shift, &wd.data128, &next);\n \n \t\t\t/* Store mbuf1 to LMTLINE/CPT NIXTX area */\n-\t\t\tcn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,\n+\t\t\tcn10k_nix_xmit_store(txq, tx_pkts[1], segdw[1], next,\n \t\t\t\t\t     cmd0[1], cmd1[1], cmd2[1], cmd3[1],\n \t\t\t\t\t     flags);\n \n@@ -2805,7 +2950,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\t\t\t\t   &shift, &wd.data128, &next);\n \n \t\t\t/* Store mbuf2 to LMTLINE/CPT NIXTX area */\n-\t\t\tcn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,\n+\t\t\tcn10k_nix_xmit_store(txq, tx_pkts[2], segdw[2], next,\n \t\t\t\t\t     cmd0[2], cmd1[2], cmd2[2], cmd3[2],\n \t\t\t\t\t     flags);\n \n@@ -2821,7 +2966,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\t\t\t\t   &shift, &wd.data128, &next);\n \n \t\t\t/* Store mbuf3 to LMTLINE/CPT NIXTX area */\n-\t\t\tcn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,\n+\t\t\tcn10k_nix_xmit_store(txq, tx_pkts[3], segdw[3], next,\n \t\t\t\t\t     cmd0[3], cmd1[3], cmd2[3], cmd3[3],\n \t\t\t\t\t     flags);\n \n@@ -2829,7 +2974,7 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,\n \t\t\tuint8_t j;\n \n \t\t\tsegdw[4] = 8;\n-\t\t\tj = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,\n+\t\t\tj = cn10k_nix_prep_lmt_mseg_vector(txq, tx_pkts, cmd0, cmd1,\n \t\t\t\t\t\t\t  cmd2, cmd3, segdw,\n \t\t\t\t\t\t\t  (uint64_t *)\n \t\t\t\t\t\t\t  LMT_OFF(laddr, lnum,\ndiff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c\nindex 3b702d9696..749214cf23 100644\n--- a/drivers/net/cnxk/cn9k_ethdev.c\n+++ b/drivers/net/cnxk/cn9k_ethdev.c\n@@ -50,6 +50,7 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)\n {\n \tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n \tuint64_t conf = dev->tx_offloads;\n+\tstruct roc_nix *nix = &dev->nix;\n \tuint16_t flags = 0;\n \n \t/* Fastpath is dependent on these enums */\n@@ -113,6 +114,9 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev)\n \tif (dev->tx_mark)\n \t\tflags |= NIX_TX_OFFLOAD_VLAN_QINQ_F;\n \n+\tif (nix->tx_compl_ena)\n+\t\tflags |= NIX_TX_OFFLOAD_MBUF_NOFF_F;\n+\n \treturn flags;\n }\n \n@@ -165,12 +169,56 @@ nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn9k_eth_txq *txq,\n \trte_wmb();\n }\n \n+static int\n+cn9k_nix_tx_compl_setup(struct cnxk_eth_dev *dev,\n+\t\tstruct cn9k_eth_txq *txq,\n+\t\tstruct roc_nix_sq *sq, uint16_t nb_desc)\n+{\n+\tstruct roc_nix_cq *cq;\n+\n+\tcq = &dev->cqs[sq->cqid];\n+\ttxq->tx_compl.desc_base = (uintptr_t)cq->desc_base;\n+\ttxq->tx_compl.cq_door = cq->door;\n+\ttxq->tx_compl.cq_status = cq->status;\n+\ttxq->tx_compl.wdata = cq->wdata;\n+\ttxq->tx_compl.head = cq->head;\n+\ttxq->tx_compl.qmask = cq->qmask;\n+\t/* Total array size holding buffers is equal to\n+\t * number of entries in cq and sq\n+\t * max buffer in array = desc in cq + desc in sq\n+\t */\n+\ttxq->tx_compl.nb_desc_mask = (2 * rte_align32pow2(nb_desc)) - 1;\n+\ttxq->tx_compl.ena = true;\n+\n+\ttxq->tx_compl.ptr = (struct rte_mbuf **)plt_zmalloc(txq->tx_compl.nb_desc_mask *\n+\t\t\tsizeof(struct rte_mbuf *), 0);\n+\tif (!txq->tx_compl.ptr)\n+\t\treturn -1;\n+\n+\treturn 0;\n+}\n+\n+static void\n+cn9k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)\n+{\n+\tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n+\tstruct roc_nix *nix = &dev->nix;\n+\tstruct cn9k_eth_txq *txq;\n+\n+\tcnxk_nix_tx_queue_release(eth_dev, qid);\n+\ttxq = eth_dev->data->tx_queues[qid];\n+\n+\tif (nix->tx_compl_ena)\n+\t\tplt_free(txq->tx_compl.ptr);\n+}\n+\n static int\n cn9k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \t\t\tuint16_t nb_desc, unsigned int socket,\n \t\t\tconst struct rte_eth_txconf *tx_conf)\n {\n \tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n+\tstruct roc_nix *nix = &dev->nix;\n \tuint64_t mark_fmt, mark_flag;\n \tstruct roc_cpt_lf *inl_lf;\n \tstruct cn9k_eth_txq *txq;\n@@ -190,6 +238,11 @@ cn9k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \t/* Update fast path queue */\n \ttxq = eth_dev->data->tx_queues[qid];\n \ttxq->fc_mem = sq->fc;\n+\tif (nix->tx_compl_ena) {\n+\t\trc = cn9k_nix_tx_compl_setup(dev, txq, sq, nb_desc);\n+\t\tif (rc)\n+\t\t\treturn rc;\n+\t}\n \ttxq->lmt_addr = sq->lmt_addr;\n \ttxq->io_addr = sq->io_addr;\n \ttxq->nb_sqb_bufs_adj = sq->nb_sqb_bufs_adj;\n@@ -634,6 +687,7 @@ nix_eth_dev_ops_override(void)\n \t/* Update platform specific ops */\n \tcnxk_eth_dev_ops.dev_configure = cn9k_nix_configure;\n \tcnxk_eth_dev_ops.tx_queue_setup = cn9k_nix_tx_queue_setup;\n+\tcnxk_eth_dev_ops.tx_queue_release = cn9k_nix_tx_queue_release;\n \tcnxk_eth_dev_ops.rx_queue_setup = cn9k_nix_rx_queue_setup;\n \tcnxk_eth_dev_ops.tx_queue_stop = cn9k_nix_tx_queue_stop;\n \tcnxk_eth_dev_ops.dev_start = cn9k_nix_dev_start;\ndiff --git a/drivers/net/cnxk/cn9k_ethdev.h b/drivers/net/cnxk/cn9k_ethdev.h\nindex 472a4b06da..a82dcb3d19 100644\n--- a/drivers/net/cnxk/cn9k_ethdev.h\n+++ b/drivers/net/cnxk/cn9k_ethdev.h\n@@ -24,6 +24,7 @@ struct cn9k_eth_txq {\n \tuint16_t cpt_desc;\n \tuint64_t mark_flag : 8;\n \tuint64_t mark_fmt : 48;\n+\tstruct cnxk_eth_txq_comp tx_compl;\n } __plt_cache_aligned;\n \n struct cn9k_eth_rxq {\ndiff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h\nindex 404edd6aed..17bbdce3a0 100644\n--- a/drivers/net/cnxk/cn9k_tx.h\n+++ b/drivers/net/cnxk/cn9k_tx.h\n@@ -81,6 +81,28 @@ cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,\n \t}\n }\n \n+static __rte_always_inline uint64_t\n+cn9k_nix_prefree_seg(struct rte_mbuf *m, struct cn9k_eth_txq *txq,\n+\t\tstruct nix_send_hdr_s *send_hdr)\n+{\n+\tuint32_t sqe_id;\n+\n+\tif (RTE_MBUF_HAS_EXTBUF(m)) {\n+\t\tif (send_hdr->w0.pnc) {\n+\t\t\ttxq->tx_compl.ptr[send_hdr->w1.sqe_id]->next = m;\n+\t\t} else {\n+\t\t\tsqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED);\n+\t\t\tsend_hdr->w0.pnc = 1;\n+\t\t\tsend_hdr->w1.sqe_id = sqe_id &\n+\t\t\t\ttxq->tx_compl.nb_desc_mask;\n+\t\t\ttxq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;\n+\t\t}\n+\t\treturn 1;\n+\t} else {\n+\t\treturn cnxk_nix_prefree_seg(m);\n+\t}\n+}\n+\n static __rte_always_inline void\n cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)\n {\n@@ -134,7 +156,8 @@ cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)\n }\n \n static __rte_always_inline void\n-cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n+cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq,\n+\t\t      struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n \t\t      const uint64_t lso_tun_fmt, uint8_t mark_flag,\n \t\t      uint64_t mark_fmt)\n {\n@@ -325,7 +348,7 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,\n \t\t\t *\t\tis greater than 1\n \t\t\t * DF bit = 0 otherwise\n \t\t\t */\n-\t\t\tsend_hdr->w0.df = cnxk_nix_prefree_seg(m);\n+\t\t\tsend_hdr->w0.df = cn9k_nix_prefree_seg(m, txq, send_hdr);\n \t\t\t/* Ensuring mbuf fields which got updated in\n \t\t\t * cnxk_nix_prefree_seg are written before LMTST.\n \t\t\t */\n@@ -401,7 +424,8 @@ cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)\n }\n \n static __rte_always_inline uint16_t\n-cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n+cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq,\n+\t\t      struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n {\n \tstruct nix_send_hdr_s *send_hdr;\n \tunion nix_send_sg_s *sg;\n@@ -429,7 +453,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n \n \t/* Set invert df if buffer is not to be freed by H/W */\n \tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n-\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\tsg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << 55);\n \t\trte_io_wmb();\n \t}\n \n@@ -450,7 +474,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)\n \t\t*slist = rte_mbuf_data_iova(m);\n \t\t/* Set invert df if buffer is not to be freed by H/W */\n \t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n-\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\tsg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << (i + 55));\n \t\t\t/* Commit changes to mbuf */\n \t\t\trte_io_wmb();\n \t\t}\n@@ -520,6 +544,83 @@ cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,\n \t} while (lmt_status == 0);\n }\n \n+static inline uint16_t\n+nix_tx_compl_nb_pkts(struct cn9k_eth_txq *txq, const uint64_t wdata,\n+\t\tconst uint16_t pkts, const uint32_t qmask)\n+{\n+\tuint32_t available = txq->tx_compl.available;\n+\n+\t/* Update the available count if cached value is not enough */\n+\tif (unlikely(available < pkts)) {\n+\t\tuint64_t reg, head, tail;\n+\n+\t\t/* Use LDADDA version to avoid reorder */\n+\t\treg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);\n+\t\t/* CQ_OP_STATUS operation error */\n+\t\tif (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) ||\n+\t\t\t\treg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))\n+\t\t\treturn 0;\n+\n+\t\ttail = reg & 0xFFFFF;\n+\t\thead = (reg >> 20) & 0xFFFFF;\n+\t\tif (tail < head)\n+\t\t\tavailable = tail - head + qmask + 1;\n+\t\telse\n+\t\t\tavailable = tail - head;\n+\n+\t\ttxq->tx_compl.available = available;\n+\t}\n+\treturn RTE_MIN(pkts, available);\n+}\n+\n+static inline void\n+handle_tx_completion_pkts(struct cn9k_eth_txq *txq, const uint16_t pkts,\n+\t\t\t  uint8_t mt_safe)\n+{\n+#define CNXK_NIX_CQ_ENTRY_SZ 128\n+#define CQE_SZ(x)            ((x) * CNXK_NIX_CQ_ENTRY_SZ)\n+\n+\tuint16_t tx_pkts = 0, nb_pkts;\n+\tconst uintptr_t desc = txq->tx_compl.desc_base;\n+\tconst uint64_t wdata = txq->tx_compl.wdata;\n+\tconst uint32_t qmask = txq->tx_compl.qmask;\n+\tuint32_t head = txq->tx_compl.head;\n+\tstruct nix_cqe_hdr_s *tx_compl_cq;\n+\tstruct nix_send_comp_s *tx_compl_s0;\n+\tstruct rte_mbuf *m_next, *m;\n+\n+\tif (mt_safe)\n+\t\trte_spinlock_lock(&txq->tx_compl.ext_buf_lock);\n+\n+\tnb_pkts = nix_tx_compl_nb_pkts(txq, wdata, pkts, qmask);\n+\twhile (tx_pkts < nb_pkts) {\n+\t\trte_prefetch_non_temporal((void *)(desc +\n+\t\t\t\t\t(CQE_SZ((head + 2) & qmask))));\n+\t\ttx_compl_cq = (struct nix_cqe_hdr_s *)\n+\t\t\t(desc + CQE_SZ(head));\n+\t\ttx_compl_s0 = (struct nix_send_comp_s *)\n+\t\t\t((uint64_t *)tx_compl_cq + 1);\n+\t\tm = txq->tx_compl.ptr[tx_compl_s0->sqe_id];\n+\t\twhile (m->next != NULL) {\n+\t\t\tm_next = m->next;\n+\t\t\trte_pktmbuf_free_seg(m);\n+\t\t\tm = m_next;\n+\t\t}\n+\t\trte_pktmbuf_free_seg(m);\n+\n+\t\thead++;\n+\t\thead &= qmask;\n+\t\ttx_pkts++;\n+\t}\n+\ttxq->tx_compl.head = head;\n+\ttxq->tx_compl.available -= nb_pkts;\n+\n+\tplt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);\n+\n+\tif (mt_safe)\n+\t\trte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);\n+}\n+\n static __rte_always_inline uint16_t\n cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,\n \t\t   uint64_t *cmd, const uint16_t flags)\n@@ -531,6 +632,9 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,\n \tuint8_t mark_flag = 0;\n \tuint16_t i;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, 0);\n+\n \tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \n \tcn9k_nix_tx_skeleton(txq, cmd, flags, 1);\n@@ -555,7 +659,7 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,\n \t\trte_io_wmb();\n \n \tfor (i = 0; i < pkts; i++) {\n-\t\tcn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,\n+\t\tcn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,\n \t\t\t\t      mark_flag, mark_fmt);\n \t\tcn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,\n \t\t\t\t\t     flags);\n@@ -580,6 +684,9 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint16_t segdw;\n \tuint64_t i;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, 0);\n+\n \tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \n \tcn9k_nix_tx_skeleton(txq, cmd, flags, 1);\n@@ -604,9 +711,9 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\trte_io_wmb();\n \n \tfor (i = 0; i < pkts; i++) {\n-\t\tcn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,\n+\t\tcn9k_nix_xmit_prepare(txq, tx_pkts[i], cmd, flags, lso_tun_fmt,\n \t\t\t\t      mark_flag, mark_fmt);\n-\t\tsegdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);\n+\t\tsegdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], cmd, flags);\n \t\tcn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,\n \t\t\t\t\t     segdw, flags);\n \t\tcn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);\n@@ -658,8 +765,9 @@ cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,\n }\n \n static __rte_always_inline uint8_t\n-cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n-\t\t\t       union nix_send_hdr_w0_u *sh,\n+cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,\n+\t\t\t       struct rte_mbuf *m, uint64_t *cmd,\n+\t\t\t       struct nix_send_hdr_s *send_hdr,\n \t\t\t       union nix_send_sg_s *sg, const uint32_t flags)\n {\n \tstruct rte_mbuf *m_next;\n@@ -668,7 +776,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n \tuint64_t segdw;\n \tint i = 1;\n \n-\tsh->total = m->pkt_len;\n+\tsend_hdr->w0.total = m->pkt_len;\n \t/* Clear sg->u header before use */\n \tsg->u &= 0xFC00000000000000;\n \tsg_u = sg->u;\n@@ -681,7 +789,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n \n \t/* Set invert df if buffer is not to be freed by H/W */\n \tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n-\t\tsg_u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\tsg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << 55);\n \t\t/* Mark mempool object as \"put\" since it is freed by NIX */\n #ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n \tif (!(sg_u & (1ULL << 55)))\n@@ -697,7 +805,7 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n \t\t*slist = rte_mbuf_data_iova(m);\n \t\t/* Set invert df if buffer is not to be freed by H/W */\n \t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)\n-\t\t\tsg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));\n+\t\t\tsg_u |= (cn9k_nix_prefree_seg(m, txq, send_hdr) << (i + 55));\n \t\t\t/* Mark mempool object as \"put\" since it is freed by NIX\n \t\t\t */\n #ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n@@ -731,24 +839,29 @@ cn9k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,\n \t/* Default dwords */\n \tsegdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +\n \t\t !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n-\tsh->sizem1 = segdw - 1;\n+\tsend_hdr->w0.sizem1 = segdw - 1;\n \n \treturn segdw;\n }\n \n static __rte_always_inline uint8_t\n-cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n+cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq,\n+\t\t\t  struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n \t\t\t  uint64x2_t *cmd1, const uint32_t flags)\n {\n-\tunion nix_send_hdr_w0_u sh;\n+\tstruct nix_send_hdr_s send_hdr;\n \tunion nix_send_sg_s sg;\n \tuint8_t ret;\n \n \tif (m->nb_segs == 1) {\n \t\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {\n+\t\t\tsend_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);\n+\t\t\tsend_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);\n \t\t\tsg.u = vgetq_lane_u64(cmd1[0], 0);\n-\t\t\tsg.u |= (cnxk_nix_prefree_seg(m) << 55);\n+\t\t\tsg.u |= (cn9k_nix_prefree_seg(m, txq, &send_hdr) << 55);\n \t\t\tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n+\t\t\tcmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);\n+\t\t\tcmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);\n \t\t}\n \n #ifdef RTE_LIBRTE_MEMPOOL_DEBUG\n@@ -761,12 +874,14 @@ cn9k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,\n \t\t       !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);\n \t}\n \n-\tsh.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsend_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);\n+\tsend_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);\n \tsg.u = vgetq_lane_u64(cmd1[0], 0);\n \n-\tret = cn9k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);\n+\tret = cn9k_nix_prepare_mseg_vec_list(txq, m, cmd, &send_hdr, &sg, flags);\n \n-\tcmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);\n+\tcmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);\n+\tcmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);\n \tcmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);\n \treturn ret;\n }\n@@ -908,13 +1023,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint64x2_t sgdesc01_w1, sgdesc23_w1;\n \tstruct cn9k_eth_txq *txq = tx_queue;\n \tuint64_t *lmt_addr = txq->lmt_addr;\n+\tuint64x2_t xmask01_w0, xmask23_w0;\n+\tuint64x2_t xmask01_w1, xmask23_w1;\n \trte_iova_t io_addr = txq->io_addr;\n+\tstruct nix_send_hdr_s send_hdr;\n \tuint64x2_t ltypes01, ltypes23;\n \tuint64x2_t xtmp128, ytmp128;\n \tuint64x2_t xmask01, xmask23;\n \tuint64_t lmt_status, i;\n \tuint16_t pkts_left;\n \n+\tif (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)\n+\t\thandle_tx_completion_pkts(txq, pkts, 0);\n+\n \tNIX_XMIT_FC_OR_RETURN(txq, pkts);\n \n \tpkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);\n@@ -1672,8 +1793,10 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\tif ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&\n \t\t    !(flags & NIX_TX_MULTI_SEG_F)) {\n \t\t\t/* Set don't free bit if reference count > 1 */\n-\t\t\txmask01 = vdupq_n_u64(0);\n-\t\t\txmask23 = xmask01;\n+\t\t\txmask01_w0 = vdupq_n_u64(0);\n+\t\t\txmask01_w1 = vdupq_n_u64(0);\n+\t\t\txmask23_w0 = xmask01_w0;\n+\t\t\txmask23_w1 = xmask01_w1;\n \n \t\t\t/* Move mbufs to iova */\n \t\t\tmbuf0 = (uint64_t *)tx_pkts[0];\n@@ -1681,35 +1804,63 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tmbuf2 = (uint64_t *)tx_pkts[2];\n \t\t\tmbuf3 = (uint64_t *)tx_pkts[3];\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))\n-\t\t\t\txmask01 = vsetq_lane_u64(0x80000, xmask01, 0);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf0, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 0);\n+\t\t\t\txmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 0);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf0)->pool,\n \t\t\t\t\t(void **)&mbuf0, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))\n-\t\t\t\txmask01 = vsetq_lane_u64(0x80000, xmask01, 1);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf1, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask01_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask01_w0, 1);\n+\t\t\t\txmask01_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask01_w1, 1);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf1)->pool,\n \t\t\t\t\t(void **)&mbuf1, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))\n-\t\t\t\txmask23 = vsetq_lane_u64(0x80000, xmask23, 0);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf2, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 0);\n+\t\t\t\txmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 0);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf2)->pool,\n \t\t\t\t\t(void **)&mbuf2, 1, 0);\n+\t\t\t}\n \n-\t\t\tif (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))\n-\t\t\t\txmask23 = vsetq_lane_u64(0x80000, xmask23, 1);\n-\t\t\telse\n+\t\t\tsend_hdr.w0.u = 0;\n+\t\t\tsend_hdr.w1.u = 0;\n+\n+\t\t\tif (cn9k_nix_prefree_seg((struct rte_mbuf *)mbuf3, txq, &send_hdr)) {\n+\t\t\t\tsend_hdr.w0.df = 1;\n+\t\t\t\txmask23_w0 = vsetq_lane_u64(send_hdr.w0.u, xmask23_w0, 1);\n+\t\t\t\txmask23_w1 = vsetq_lane_u64(send_hdr.w1.u, xmask23_w1, 1);\n+\t\t\t} else {\n \t\t\t\tRTE_MEMPOOL_CHECK_COOKIES(\n \t\t\t\t\t((struct rte_mbuf *)mbuf3)->pool,\n \t\t\t\t\t(void **)&mbuf3, 1, 0);\n-\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);\n-\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);\n+\t\t\t}\n+\n+\t\t\tsenddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01_w0);\n+\t\t\tsenddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23_w0);\n+\t\t\tsenddesc01_w1 = vorrq_u64(senddesc01_w1, xmask01_w1);\n+\t\t\tsenddesc23_w1 = vorrq_u64(senddesc23_w1, xmask23_w1);\n+\n \t\t\t/* Ensuring mbuf fields which got updated in\n \t\t\t * cnxk_nix_prefree_seg are written before LMTST.\n \t\t\t */\n@@ -1769,7 +1920,8 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,\n \n \t\t\t/* Build mseg list for each packet individually. */\n \t\t\tfor (j = 0; j < NIX_DESCS_PER_LOOP; j++)\n-\t\t\t\tsegdw[j] = cn9k_nix_prepare_mseg_vec(tx_pkts[j],\n+\t\t\t\tsegdw[j] = cn9k_nix_prepare_mseg_vec(txq,\n+\t\t\t\t\t\t\ttx_pkts[j],\n \t\t\t\t\t\t\tseg_list[j], &cmd0[j],\n \t\t\t\t\t\t\t&cmd1[j], flags);\n \t\t\tsegdw[4] = 8;\ndiff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c\nindex 104aad7b51..1be2e9e776 100644\n--- a/drivers/net/cnxk/cnxk_ethdev.c\n+++ b/drivers/net/cnxk/cnxk_ethdev.c\n@@ -455,7 +455,9 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n {\n \tstruct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);\n \tconst struct eth_dev_ops *dev_ops = eth_dev->dev_ops;\n+\tstruct roc_nix *nix = &dev->nix;\n \tstruct cnxk_eth_txq_sp *txq_sp;\n+\tstruct roc_nix_cq *cq;\n \tstruct roc_nix_sq *sq;\n \tsize_t txq_sz;\n \tint rc;\n@@ -480,6 +482,19 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \tsq->max_sqe_sz = nix_sq_max_sqe_sz(dev);\n \tsq->tc = ROC_NIX_PFC_CLASS_INVALID;\n \n+\tif (nix->tx_compl_ena) {\n+\t\tsq->cqid = sq->qid + dev->nb_rxq;\n+\t\tsq->cq_ena = 1;\n+\t\tcq = &dev->cqs[sq->cqid];\n+\t\tcq->qid = sq->cqid;\n+\t\tcq->nb_desc = nb_desc;\n+\t\trc = roc_nix_cq_init(&dev->nix, cq);\n+\t\tif (rc) {\n+\t\t\tplt_err(\"Failed to init cq=%d, rc=%d\", cq->qid, rc);\n+\t\t\treturn rc;\n+\t\t}\n+\t}\n+\n \trc = roc_nix_sq_init(&dev->nix, sq);\n \tif (rc) {\n \t\tplt_err(\"Failed to init sq=%d, rc=%d\", qid, rc);\n@@ -513,7 +528,7 @@ cnxk_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \treturn 0;\n }\n \n-static void\n+void\n cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid)\n {\n \tvoid *txq = eth_dev->data->tx_queues[qid];\n@@ -1234,7 +1249,7 @@ cnxk_nix_configure(struct rte_eth_dev *eth_dev)\n \tif (roc_nix_is_lbk(nix))\n \t\tnix->enable_loop = eth_dev->data->dev_conf.lpbk_mode;\n \n-\tnix->tx_compl_ena = 0;\n+\tnix->tx_compl_ena = dev->tx_compl_ena;\n \n \t/* Alloc a nix lf */\n \trc = roc_nix_lf_alloc(nix, nb_rxq, nb_txq, rx_cfg);\n@@ -1277,6 +1292,15 @@ cnxk_nix_configure(struct rte_eth_dev *eth_dev)\n \t\t\tgoto free_nix_lf;\n \t\t}\n \t\tdev->sqs = qs;\n+\n+\t\tif (nix->tx_compl_ena) {\n+\t\t\tqs = plt_zmalloc(sizeof(struct roc_nix_cq) * nb_txq, 0);\n+\t\t\tif (!qs) {\n+\t\t\t\tplt_err(\"Failed to alloc cqs\");\n+\t\t\t\tgoto free_nix_lf;\n+\t\t\t}\n+\t\t\tdev->cqs = qs;\n+\t\t}\n \t}\n \n \t/* Re-enable NIX LF error interrupts */\ndiff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h\nindex a86e9dba80..4ba40e52b3 100644\n--- a/drivers/net/cnxk/cnxk_ethdev.h\n+++ b/drivers/net/cnxk/cnxk_ethdev.h\n@@ -152,6 +152,21 @@\n \n #define CNXK_TX_MARK_FMT_MASK (0xFFFFFFFFFFFFull)\n \n+struct cnxk_eth_txq_comp {\n+\tuintptr_t desc_base;\n+\tuintptr_t cq_door;\n+\tint64_t *cq_status;\n+\tuint64_t wdata;\n+\tuint32_t head;\n+\tuint32_t qmask;\n+\tuint32_t nb_desc_mask;\n+\tuint32_t available;\n+\tuint32_t sqe_id;\n+\tbool ena;\n+\tstruct rte_mbuf **ptr;\n+\trte_spinlock_t ext_buf_lock;\n+};\n+\n struct cnxk_fc_cfg {\n \tenum rte_eth_fc_mode mode;\n \tuint8_t rx_pause;\n@@ -366,6 +381,7 @@ struct cnxk_eth_dev {\n \tuint16_t flags;\n \tuint8_t ptype_disable;\n \tbool scalar_ena;\n+\tbool tx_compl_ena;\n \tbool tx_mark;\n \tbool ptp_en;\n \tbool rx_mark_update; /* Enable/Disable mark update to mbuf */\n@@ -544,6 +560,7 @@ int cnxk_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,\n \t\t\t    const struct rte_eth_rxconf *rx_conf,\n \t\t\t    struct rte_mempool *mp);\n int cnxk_nix_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t qid);\n+void cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid);\n int cnxk_nix_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t qid);\n int cnxk_nix_dev_start(struct rte_eth_dev *eth_dev);\n int cnxk_nix_timesync_enable(struct rte_eth_dev *eth_dev);\ndiff --git a/drivers/net/cnxk/cnxk_ethdev_devargs.c b/drivers/net/cnxk/cnxk_ethdev_devargs.c\nindex d28509dbda..dbf5bd847d 100644\n--- a/drivers/net/cnxk/cnxk_ethdev_devargs.c\n+++ b/drivers/net/cnxk/cnxk_ethdev_devargs.c\n@@ -231,6 +231,7 @@ parse_sdp_channel_mask(const char *key, const char *value, void *extra_args)\n \n #define CNXK_RSS_RETA_SIZE\t\"reta_size\"\n #define CNXK_SCL_ENABLE\t\t\"scalar_enable\"\n+#define CNXK_TX_COMPL_ENA       \"tx_compl_ena\"\n #define CNXK_MAX_SQB_COUNT\t\"max_sqb_count\"\n #define CNXK_FLOW_PREALLOC_SIZE \"flow_prealloc_size\"\n #define CNXK_FLOW_MAX_PRIORITY\t\"flow_max_priority\"\n@@ -266,6 +267,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)\n \tstruct sdp_channel sdp_chan;\n \tuint16_t rss_tag_as_xor = 0;\n \tuint16_t scalar_enable = 0;\n+\tuint16_t tx_compl_ena = 0;\n \tuint16_t custom_sa_act = 0;\n \tstruct rte_kvargs *kvlist;\n \tuint16_t no_inl_dev = 0;\n@@ -285,6 +287,8 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)\n \t\t\t   &reta_sz);\n \trte_kvargs_process(kvlist, CNXK_SCL_ENABLE, &parse_flag,\n \t\t\t   &scalar_enable);\n+\trte_kvargs_process(kvlist, CNXK_TX_COMPL_ENA, &parse_flag,\n+\t\t\t   &tx_compl_ena);\n \trte_kvargs_process(kvlist, CNXK_MAX_SQB_COUNT, &parse_sqb_count,\n \t\t\t   &sqb_count);\n \trte_kvargs_process(kvlist, CNXK_FLOW_PREALLOC_SIZE,\n@@ -319,6 +323,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)\n \n null_devargs:\n \tdev->scalar_ena = !!scalar_enable;\n+\tdev->tx_compl_ena = !!tx_compl_ena;\n \tdev->inb.no_inl_dev = !!no_inl_dev;\n \tdev->inb.min_spi = ipsec_in_min_spi;\n \tdev->inb.max_spi = ipsec_in_max_spi;\n@@ -349,6 +354,7 @@ cnxk_ethdev_parse_devargs(struct rte_devargs *devargs, struct cnxk_eth_dev *dev)\n RTE_PMD_REGISTER_PARAM_STRING(net_cnxk,\n \t\t\t      CNXK_RSS_RETA_SIZE \"=<64|128|256>\"\n \t\t\t      CNXK_SCL_ENABLE \"=1\"\n+\t\t\t      CNXK_TX_COMPL_ENA \"=1\"\n \t\t\t      CNXK_MAX_SQB_COUNT \"=<8-512>\"\n \t\t\t      CNXK_FLOW_PREALLOC_SIZE \"=<1-32>\"\n \t\t\t      CNXK_FLOW_MAX_PRIORITY \"=<1-32>\"\n",
    "prefixes": [
        "05/11"
    ]
}