get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/119022/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 119022,
    "url": "http://patchwork.dpdk.org/api/patches/119022/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20221024131227.1062446-18-junfeng.guo@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20221024131227.1062446-18-junfeng.guo@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20221024131227.1062446-18-junfeng.guo@intel.com",
    "date": "2022-10-24T13:12:26",
    "name": "[v11,17/18] net/idpf: add AVX512 data path for single queue model",
    "commit_ref": null,
    "pull_url": null,
    "state": "changes-requested",
    "archived": true,
    "hash": "0134ac0977ce741bdf8b54e156c88926dafb9eb5",
    "submitter": {
        "id": 1785,
        "url": "http://patchwork.dpdk.org/api/people/1785/?format=api",
        "name": "Junfeng Guo",
        "email": "junfeng.guo@intel.com"
    },
    "delegate": {
        "id": 3961,
        "url": "http://patchwork.dpdk.org/api/users/3961/?format=api",
        "username": "arybchenko",
        "first_name": "Andrew",
        "last_name": "Rybchenko",
        "email": "andrew.rybchenko@oktetlabs.ru"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20221024131227.1062446-18-junfeng.guo@intel.com/mbox/",
    "series": [
        {
            "id": 25386,
            "url": "http://patchwork.dpdk.org/api/series/25386/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=25386",
            "date": "2022-10-24T13:12:09",
            "name": "add support for idpf PMD in DPDK",
            "version": 11,
            "mbox": "http://patchwork.dpdk.org/series/25386/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/119022/comments/",
    "check": "success",
    "checks": "http://patchwork.dpdk.org/api/patches/119022/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id BF7A6A034C;\n\tMon, 24 Oct 2022 15:16:10 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 5BEED42BFA;\n\tMon, 24 Oct 2022 15:14:51 +0200 (CEST)",
            "from mga06.intel.com (mga06b.intel.com [134.134.136.31])\n by mails.dpdk.org (Postfix) with ESMTP id 65ACD427F0\n for <dev@dpdk.org>; Mon, 24 Oct 2022 15:14:46 +0200 (CEST)",
            "from orsmga001.jf.intel.com ([10.7.209.18])\n by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 24 Oct 2022 06:14:45 -0700",
            "from dpdk-jf-ntb-one.sh.intel.com ([10.67.111.104])\n by orsmga001.jf.intel.com with ESMTP; 24 Oct 2022 06:14:42 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1666617286; x=1698153286;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=NteOEebsOBCh22LdS2wpSQ1pmNg5yCYcqngTUdTKLTI=;\n b=VB3JjTaxcexYCDAWDoji+uWWcao/0clV4g1bcswYwSFiYd3hO5W64Z6z\n uMtXFCEu4hZXkgzaU7wpidP8yw5q7/dk29TlgSY1yLwsqG92MlBt+wlr8\n 6rIyEHq+ynvBWZf/CoB06h7vfxbyg/os5DuXM5KzdFgjbgLTYk6dtivcS\n tP9An66QvxMYLhpE6Kmww3STMLArwYDmzUTjJ6JwJ0xJyhJ92C5MXEzIr\n zEfimxjCqjP8+gW4My8DTRf6QupfZ9Br4mG8Ul82P9638YfJOzIceWTdt\n kzCoaQqtPYYnO0mbP16cfTWsFL3OvitDOk99XQglMXWWtfswkjp9Vz9rG Q==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6500,9779,10510\"; a=\"369480233\"",
            "E=Sophos;i=\"5.95,209,1661842800\"; d=\"scan'208\";a=\"369480233\"",
            "E=McAfee;i=\"6500,9779,10510\"; a=\"664540089\"",
            "E=Sophos;i=\"5.95,209,1661842800\"; d=\"scan'208\";a=\"664540089\""
        ],
        "X-ExtLoop1": "1",
        "From": "Junfeng Guo <junfeng.guo@intel.com>",
        "To": "andrew.rybchenko@oktetlabs.ru, qi.z.zhang@intel.com,\n jingjing.wu@intel.com,\n beilei.xing@intel.com",
        "Cc": "dev@dpdk.org, Junfeng Guo <junfeng.guo@intel.com>,\n Wenjun Wu <wenjun1.wu@intel.com>",
        "Subject": "[PATCH v11 17/18] net/idpf: add AVX512 data path for single queue\n model",
        "Date": "Mon, 24 Oct 2022 21:12:26 +0800",
        "Message-Id": "<20221024131227.1062446-18-junfeng.guo@intel.com>",
        "X-Mailer": "git-send-email 2.34.1",
        "In-Reply-To": "<20221024131227.1062446-1-junfeng.guo@intel.com>",
        "References": "<20221024130134.1046536-2-junfeng.guo@intel.com>\n <20221024131227.1062446-1-junfeng.guo@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Add support of AVX512 vector data path for single queue model.\n\nSigned-off-by: Wenjun Wu <wenjun1.wu@intel.com>\nSigned-off-by: Junfeng Guo <junfeng.guo@intel.com>\n---\n doc/guides/nics/idpf.rst                |  19 +\n drivers/net/idpf/idpf_ethdev.h          |   5 +\n drivers/net/idpf/idpf_rxtx.c            | 145 ++++\n drivers/net/idpf/idpf_rxtx.h            |  22 +\n drivers/net/idpf/idpf_rxtx_vec_avx512.c | 871 ++++++++++++++++++++++++\n drivers/net/idpf/idpf_rxtx_vec_common.h | 100 +++\n drivers/net/idpf/meson.build            |  28 +\n 7 files changed, 1190 insertions(+)\n create mode 100644 drivers/net/idpf/idpf_rxtx_vec_avx512.c\n create mode 100644 drivers/net/idpf/idpf_rxtx_vec_common.h",
    "diff": "diff --git a/doc/guides/nics/idpf.rst b/doc/guides/nics/idpf.rst\nindex c1001d5d0c..3039c61748 100644\n--- a/doc/guides/nics/idpf.rst\n+++ b/doc/guides/nics/idpf.rst\n@@ -64,3 +64,22 @@ Refer to the document :ref:`compiling and testing a PMD for a NIC <pmd_build_and\n for details.\n \n \n+Features\n+--------\n+\n+Vector PMD\n+~~~~~~~~~~\n+\n+Vector path for RX and TX path are selected automatically. The paths\n+are chosen based on 2 conditions.\n+\n+- ``CPU``\n+  On the X86 platform, the driver checks if the CPU supports AVX512.\n+  If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth``\n+  is set to 512, AVX512 paths will be chosen.\n+\n+- ``Offload features``\n+  The supported HW offload features are described in the document idpf.ini,\n+  A value \"P\" means the offload feature is not supported by vector path.\n+  If any not supported features are used, idpf vector PMD is disabled and the\n+  scalar paths are chosen.\ndiff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h\nindex 7fe2647956..2485b3a784 100644\n--- a/drivers/net/idpf/idpf_ethdev.h\n+++ b/drivers/net/idpf/idpf_ethdev.h\n@@ -180,6 +180,11 @@ struct idpf_adapter {\n \tuint32_t ptype_tbl[IDPF_MAX_PKT_TYPE] __rte_cache_min_aligned;\n \n \tbool stopped;\n+\n+\tbool rx_vec_allowed;\n+\tbool tx_vec_allowed;\n+\tbool rx_use_avx512;\n+\tbool tx_use_avx512;\n };\n \n TAILQ_HEAD(idpf_adapter_list, idpf_adapter);\ndiff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c\nindex 8f82cf1b59..abef84b3b0 100644\n--- a/drivers/net/idpf/idpf_rxtx.c\n+++ b/drivers/net/idpf/idpf_rxtx.c\n@@ -4,9 +4,11 @@\n \n #include <ethdev_driver.h>\n #include <rte_net.h>\n+#include <rte_vect.h>\n \n #include \"idpf_ethdev.h\"\n #include \"idpf_rxtx.h\"\n+#include \"idpf_rxtx_vec_common.h\"\n \n const uint32_t *\n idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)\n@@ -271,6 +273,8 @@ reset_single_rx_queue(struct idpf_rx_queue *rxq)\n \n \trxq->pkt_first_seg = NULL;\n \trxq->pkt_last_seg = NULL;\n+\trxq->rxrearm_start = 0;\n+\trxq->rxrearm_nb = 0;\n }\n \n static inline void\n@@ -2118,25 +2122,166 @@ idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,\n \treturn i;\n }\n \n+static void __rte_cold\n+release_rxq_mbufs_vec(struct idpf_rx_queue *rxq)\n+{\n+\tconst uint16_t mask = rxq->nb_rx_desc - 1;\n+\tuint16_t i;\n+\n+\tif (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc)\n+\t\treturn;\n+\n+\t/* free all mbufs that are valid in the ring */\n+\tif (rxq->rxrearm_nb == 0) {\n+\t\tfor (i = 0; i < rxq->nb_rx_desc; i++) {\n+\t\t\tif (rxq->sw_ring[i] != NULL)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i]);\n+\t\t}\n+\t} else {\n+\t\tfor (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) {\n+\t\t\tif (rxq->sw_ring[i] != NULL)\n+\t\t\t\trte_pktmbuf_free_seg(rxq->sw_ring[i]);\n+\t\t}\n+\t}\n+\n+\trxq->rxrearm_nb = rxq->nb_rx_desc;\n+\n+\t/* set all entries to NULL */\n+\tmemset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);\n+}\n+\n+static const struct idpf_rxq_ops def_singleq_rx_ops_vec = {\n+\t.release_mbufs = release_rxq_mbufs_vec,\n+};\n+\n+static inline int\n+idpf_singleq_rx_vec_setup_default(struct idpf_rx_queue *rxq)\n+{\n+\tuintptr_t p;\n+\tstruct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */\n+\n+\tmb_def.nb_segs = 1;\n+\tmb_def.data_off = RTE_PKTMBUF_HEADROOM;\n+\tmb_def.port = rxq->port_id;\n+\trte_mbuf_refcnt_set(&mb_def, 1);\n+\n+\t/* prevent compiler reordering: rearm_data covers previous fields */\n+\trte_compiler_barrier();\n+\tp = (uintptr_t)&mb_def.rearm_data;\n+\trxq->mbuf_initializer = *(uint64_t *)p;\n+\treturn 0;\n+}\n+\n+int __rte_cold\n+idpf_singleq_rx_vec_setup(struct idpf_rx_queue *rxq)\n+{\n+\trxq->ops = &def_singleq_rx_ops_vec;\n+\treturn idpf_singleq_rx_vec_setup_default(rxq);\n+}\n+\n void\n idpf_set_rx_function(struct rte_eth_dev *dev)\n {\n \tstruct idpf_vport *vport = dev->data->dev_private;\n+#ifdef RTE_ARCH_X86\n+\tstruct idpf_adapter *ad = vport->adapter;\n+\tstruct idpf_rx_queue *rxq;\n+\tint i;\n+\n+\tif (idpf_rx_vec_dev_check_default(dev) == IDPF_VECTOR_PATH &&\n+\t    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {\n+\t\tad->rx_vec_allowed = true;\n+\n+\t\tif (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)\n+#ifdef CC_AVX512_SUPPORT\n+\t\t\tif (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&\n+\t\t\t    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)\n+\t\t\t\tad->rx_use_avx512 = true;\n+#else\n+\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t    \"AVX512 is not supported in build env\");\n+#endif /* CC_AVX512_SUPPORT */\n+\t} else {\n+\t\tad->rx_vec_allowed = false;\n+\t}\n+#endif /* RTE_ARCH_X86 */\n+\n+#ifdef RTE_ARCH_X86\n+\tif (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {\n+\t\tdev->rx_pkt_burst = idpf_splitq_recv_pkts;\n+\t} else {\n+\t\tif (ad->rx_vec_allowed) {\n+\t\t\tfor (i = 0; i < dev->data->nb_tx_queues; i++) {\n+\t\t\t\trxq = dev->data->rx_queues[i];\n+\t\t\t\t(void)idpf_singleq_rx_vec_setup(rxq);\n+\t\t\t}\n+#ifdef CC_AVX512_SUPPORT\n+\t\t\tif (ad->rx_use_avx512) {\n+\t\t\t\tdev->rx_pkt_burst = idpf_singleq_recv_pkts_avx512;\n+\t\t\t\treturn;\n+\t\t\t}\n+#endif /* CC_AVX512_SUPPORT */\n+\t\t}\n \n+\t\tdev->rx_pkt_burst = idpf_singleq_recv_pkts;\n+\t}\n+#else\n \tif (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)\n \t\tdev->rx_pkt_burst = idpf_splitq_recv_pkts;\n \telse\n \t\tdev->rx_pkt_burst = idpf_singleq_recv_pkts;\n+#endif /* RTE_ARCH_X86 */\n }\n \n void\n idpf_set_tx_function(struct rte_eth_dev *dev)\n {\n \tstruct idpf_vport *vport = dev->data->dev_private;\n+#ifdef RTE_ARCH_X86\n+\tstruct idpf_adapter *ad = vport->adapter;\n+#ifdef CC_AVX512_SUPPORT\n+\tstruct idpf_tx_queue *txq;\n+\tint i;\n+#endif /* CC_AVX512_SUPPORT */\n+\n+\tif (idpf_rx_vec_dev_check_default(dev) == IDPF_VECTOR_PATH &&\n+\t    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {\n+\t\tad->tx_vec_allowed = true;\n+\t\tif (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)\n+#ifdef CC_AVX512_SUPPORT\n+\t\t\tif (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&\n+\t\t\t    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)\n+\t\t\t\tad->tx_use_avx512 = true;\n+#else\n+\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t    \"AVX512 is not supported in build env\");\n+#endif /* CC_AVX512_SUPPORT */\n+\t} else {\n+\t\tad->tx_vec_allowed = false;\n+\t}\n+#endif /* RTE_ARCH_X86 */\n+\n \tif (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {\n \t\tdev->tx_pkt_burst = idpf_splitq_xmit_pkts;\n \t\tdev->tx_pkt_prepare = idpf_prep_pkts;\n \t} else {\n+#ifdef RTE_ARCH_X86\n+\t\tif (ad->tx_vec_allowed) {\n+#ifdef CC_AVX512_SUPPORT\n+\t\t\tif (ad->tx_use_avx512) {\n+\t\t\t\tfor (i = 0; i < dev->data->nb_tx_queues; i++) {\n+\t\t\t\t\ttxq = dev->data->tx_queues[i];\n+\t\t\t\t\tif (txq == NULL)\n+\t\t\t\t\t\tcontinue;\n+\t\t\t\t\tidpf_singleq_tx_vec_setup_avx512(txq);\n+\t\t\t\t}\n+\t\t\t\tdev->tx_pkt_burst = idpf_singleq_xmit_pkts_avx512;\n+\t\t\t\tdev->tx_pkt_prepare = idpf_prep_pkts;\n+\t\t\t\treturn;\n+\t\t\t}\n+#endif /* CC_AVX512_SUPPORT */\n+\t\t}\n+#endif /* RTE_ARCH_X86 */\n \t\tdev->tx_pkt_burst = idpf_singleq_xmit_pkts;\n \t\tdev->tx_pkt_prepare = idpf_prep_pkts;\n \t}\ndiff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h\nindex efb2734d85..e808710b41 100644\n--- a/drivers/net/idpf/idpf_rxtx.h\n+++ b/drivers/net/idpf/idpf_rxtx.h\n@@ -18,6 +18,12 @@\n #define IDPF_RX_MAX_BURST\t32\n #define IDPF_DEFAULT_RX_FREE_THRESH\t32\n \n+/* used for Vector PMD */\n+#define IDPF_VPMD_RX_MAX_BURST\t32\n+#define IDPF_VPMD_TX_MAX_BURST\t32\n+#define IDPF_VPMD_DESCS_PER_LOOP\t4\n+#define IDPF_RXQ_REARM_THRESH\t64\n+\n #define IDPF_DEFAULT_TX_RS_THRESH\t32\n #define IDPF_DEFAULT_TX_FREE_THRESH\t32\n \n@@ -52,6 +58,11 @@ struct idpf_rx_queue {\n \tstruct rte_mbuf *pkt_last_seg;\t/* last segment of current packet */\n \tstruct rte_mbuf fake_mbuf;\t/* dummy mbuf */\n \n+\t/* used for VPMD */\n+\tuint16_t rxrearm_nb;       /* number of remaining to be re-armed */\n+\tuint16_t rxrearm_start;    /* the idx we start the re-arming from */\n+\tuint64_t mbuf_initializer; /* value to init mbufs */\n+\n \tuint16_t rx_nb_avail;\n \tuint16_t rx_next_avail;\n \n@@ -82,6 +93,10 @@ struct idpf_tx_entry {\n \tuint16_t last_id;\n };\n \n+struct idpf_tx_vec_entry {\n+\tstruct rte_mbuf *mbuf;\n+};\n+\n /* Structure associated with each TX queue. */\n struct idpf_tx_queue {\n \tconst struct rte_memzone *mz;\t\t/* memzone for Tx ring */\n@@ -166,12 +181,19 @@ uint16_t idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t\tuint16_t nb_pkts);\n uint16_t idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\t\t       uint16_t nb_pkts);\n+uint16_t idpf_singleq_recv_pkts_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t\t       uint16_t nb_pkts);\n uint16_t idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t\tuint16_t nb_pkts);\n uint16_t idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\t       uint16_t nb_pkts);\n+uint16_t idpf_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t\t       uint16_t nb_pkts);\n+int idpf_singleq_tx_vec_setup_avx512(struct idpf_tx_queue *txq);\n uint16_t idpf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n \t\t\tuint16_t nb_pkts);\n+int idpf_singleq_rx_vec_setup(struct idpf_rx_queue *rxq);\n+\n void idpf_stop_queues(struct rte_eth_dev *dev);\n \n void idpf_set_rx_function(struct rte_eth_dev *dev);\ndiff --git a/drivers/net/idpf/idpf_rxtx_vec_avx512.c b/drivers/net/idpf/idpf_rxtx_vec_avx512.c\nnew file mode 100644\nindex 0000000000..2e8b52b795\n--- /dev/null\n+++ b/drivers/net/idpf/idpf_rxtx_vec_avx512.c\n@@ -0,0 +1,871 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#include \"idpf_rxtx_vec_common.h\"\n+\n+#include <rte_vect.h>\n+\n+#ifndef __INTEL_COMPILER\n+#pragma GCC diagnostic ignored \"-Wcast-qual\"\n+#endif\n+\n+#define IDPF_DESCS_PER_LOOP_AVX 8\n+#define PKTLEN_SHIFT 10\n+\n+/******************************************************************************\n+ * If user knows a specific offload is not enabled by APP,\n+ * the macro can be commented to save the effort of fast path.\n+ * Currently below 1 feature is supported in RX path,\n+ * 1, packet type analysis\n+ ******************************************************************************/\n+#define IDPF_RX_PTYPE_OFFLOAD\n+\n+static __rte_always_inline void\n+idpf_singleq_rearm_common(struct idpf_rx_queue *rxq)\n+{\n+\tstruct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];\n+\tvolatile union virtchnl2_rx_desc *rxdp = rxq->rx_ring;\n+\tuint16_t rx_id;\n+\tint i;\n+\n+\trxdp += rxq->rxrearm_start;\n+\n+\t/* Pull 'n' more MBUFs into the software ring */\n+\tif (rte_mempool_get_bulk(rxq->mp,\n+\t\t\t\t (void *)rxp,\n+\t\t\t\t IDPF_RXQ_REARM_THRESH) < 0) {\n+\t\tif (rxq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >=\n+\t\t    rxq->nb_rx_desc) {\n+\t\t\t__m128i dma_addr0;\n+\n+\t\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t\tfor (i = 0; i < IDPF_VPMD_DESCS_PER_LOOP; i++) {\n+\t\t\t\trxp[i] = &rxq->fake_mbuf;\n+\t\t\t\t_mm_store_si128((__m128i *)&rxdp[i].read,\n+\t\t\t\t\t\tdma_addr0);\n+\t\t\t}\n+\t\t}\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\tIDPF_RXQ_REARM_THRESH;\n+\t\treturn;\n+\t}\n+\tstruct rte_mbuf *mb0, *mb1, *mb2, *mb3;\n+\tstruct rte_mbuf *mb4, *mb5, *mb6, *mb7;\n+\t__m512i dma_addr0_3, dma_addr4_7;\n+\t__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);\n+\t/* Initialize the mbufs in vector, process 8 mbufs in one loop */\n+\tfor (i = 0; i < IDPF_RXQ_REARM_THRESH;\n+\t\t\ti += 8, rxp += 8, rxdp += 8) {\n+\t\t__m128i vaddr0, vaddr1, vaddr2, vaddr3;\n+\t\t__m128i vaddr4, vaddr5, vaddr6, vaddr7;\n+\t\t__m256i vaddr0_1, vaddr2_3;\n+\t\t__m256i vaddr4_5, vaddr6_7;\n+\t\t__m512i vaddr0_3, vaddr4_7;\n+\n+\t\tmb0 = rxp[0];\n+\t\tmb1 = rxp[1];\n+\t\tmb2 = rxp[2];\n+\t\tmb3 = rxp[3];\n+\t\tmb4 = rxp[4];\n+\t\tmb5 = rxp[5];\n+\t\tmb6 = rxp[6];\n+\t\tmb7 = rxp[7];\n+\n+\t\t/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=\n+\t\t\t\toffsetof(struct rte_mbuf, buf_addr) + 8);\n+\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\t\tvaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);\n+\t\tvaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);\n+\t\tvaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);\n+\t\tvaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);\n+\t\tvaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);\n+\t\tvaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);\n+\n+\t\t/**\n+\t\t * merge 0 & 1, by casting 0 to 256-bit and inserting 1\n+\t\t * into the high lanes. Similarly for 2 & 3, and so on.\n+\t\t */\n+\t\tvaddr0_1 =\n+\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),\n+\t\t\t\t\t\tvaddr1, 1);\n+\t\tvaddr2_3 =\n+\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),\n+\t\t\t\t\t\tvaddr3, 1);\n+\t\tvaddr4_5 =\n+\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),\n+\t\t\t\t\t\tvaddr5, 1);\n+\t\tvaddr6_7 =\n+\t\t\t_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),\n+\t\t\t\t\t\tvaddr7, 1);\n+\t\tvaddr0_3 =\n+\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),\n+\t\t\t\t\t\tvaddr2_3, 1);\n+\t\tvaddr4_7 =\n+\t\t\t_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),\n+\t\t\t\t\t\tvaddr6_7, 1);\n+\n+\t\t/* convert pa to dma_addr hdr/data */\n+\t\tdma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);\n+\t\tdma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);\n+\n+\t\t/* add headroom to pa values */\n+\t\tdma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);\n+\t\tdma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);\n+\n+\t\t/* flush desc with pa dma_addr */\n+\t\t_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);\n+\t\t_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);\n+\t}\n+\n+\trxq->rxrearm_start += IDPF_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= IDPF_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tIDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);\n+}\n+\n+static __rte_always_inline void\n+idpf_singleq_rearm(struct idpf_rx_queue *rxq)\n+{\n+\tint i;\n+\tuint16_t rx_id;\n+\tvolatile union virtchnl2_rx_desc *rxdp = rxq->rx_ring;\n+\tstruct rte_mempool_cache *cache =\n+\t\trte_mempool_default_cache(rxq->mp, rte_lcore_id());\n+\tstruct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];\n+\n+\trxdp += rxq->rxrearm_start;\n+\n+\tif (unlikely(cache == NULL))\n+\t\treturn idpf_singleq_rearm_common(rxq);\n+\n+\t/* We need to pull 'n' more MBUFs into the software ring from mempool\n+\t * We inline the mempool function here, so we can vectorize the copy\n+\t * from the cache into the shadow ring.\n+\t */\n+\n+\t/* Can this be satisfied from the cache? */\n+\tif (cache->len < IDPF_RXQ_REARM_THRESH) {\n+\t\t/* No. Backfill the cache first, and then fill from it */\n+\t\tuint32_t req = IDPF_RXQ_REARM_THRESH + (cache->size -\n+\t\t\t\t\t\t\tcache->len);\n+\n+\t\t/* How many do we require i.e. number to fill the cache + the request */\n+\t\tint ret = rte_mempool_ops_dequeue_bulk\n+\t\t\t\t(rxq->mp, &cache->objs[cache->len], req);\n+\t\tif (ret == 0) {\n+\t\t\tcache->len += req;\n+\t\t} else {\n+\t\t\tif (rxq->rxrearm_nb + IDPF_RXQ_REARM_THRESH >=\n+\t\t\t    rxq->nb_rx_desc) {\n+\t\t\t\t__m128i dma_addr0;\n+\n+\t\t\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t\t\tfor (i = 0; i < IDPF_VPMD_DESCS_PER_LOOP; i++) {\n+\t\t\t\t\trxp[i] = &rxq->fake_mbuf;\n+\t\t\t\t\t_mm_storeu_si128((__m128i *)&rxdp[i].read,\n+\t\t\t\t\t\t\t dma_addr0);\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\t\t\tIDPF_RXQ_REARM_THRESH;\n+\t\t\treturn;\n+\t\t}\n+\t}\n+\n+\tconst __m512i iova_offsets =  _mm512_set1_epi64(offsetof\n+\t\t\t\t\t\t\t(struct rte_mbuf, buf_iova));\n+\tconst __m512i headroom = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);\n+\n+\t/* to shuffle the addresses to correct slots. Values 4-7 will contain\n+\t * zeros, so use 7 for a zero-value.\n+\t */\n+\tconst __m512i permute_idx = _mm512_set_epi64(7, 7, 3, 1, 7, 7, 2, 0);\n+\n+\t/* Initialize the mbufs in vector, process 8 mbufs in one loop, taking\n+\t * from mempool cache and populating both shadow and HW rings\n+\t */\n+\tfor (i = 0; i < IDPF_RXQ_REARM_THRESH / IDPF_DESCS_PER_LOOP_AVX; i++) {\n+\t\tconst __m512i mbuf_ptrs = _mm512_loadu_si512\n+\t\t\t(&cache->objs[cache->len - IDPF_DESCS_PER_LOOP_AVX]);\n+\t\t_mm512_storeu_si512(rxp, mbuf_ptrs);\n+\n+\t\tconst __m512i iova_base_addrs = _mm512_i64gather_epi64\n+\t\t\t\t(_mm512_add_epi64(mbuf_ptrs, iova_offsets),\n+\t\t\t\t 0, /* base */\n+\t\t\t\t 1  /* scale */);\n+\t\tconst __m512i iova_addrs = _mm512_add_epi64(iova_base_addrs,\n+\t\t\t\theadroom);\n+\t\tconst __m512i iovas0 = _mm512_castsi256_si512\n+\t\t\t\t(_mm512_extracti64x4_epi64(iova_addrs, 0));\n+\t\tconst __m512i iovas1 = _mm512_castsi256_si512\n+\t\t\t\t(_mm512_extracti64x4_epi64(iova_addrs, 1));\n+\n+\t\t/* permute leaves desc 2-3 addresses in header address slots 0-1\n+\t\t * but these are ignored by driver since header split not\n+\t\t * enabled. Similarly for desc 6 & 7.\n+\t\t */\n+\t\tconst __m512i desc0_1 = _mm512_permutexvar_epi64\n+\t\t\t\t(permute_idx,\n+\t\t\t\t iovas0);\n+\t\tconst __m512i desc2_3 = _mm512_bsrli_epi128(desc0_1, 8);\n+\n+\t\tconst __m512i desc4_5 = _mm512_permutexvar_epi64\n+\t\t\t\t(permute_idx,\n+\t\t\t\t iovas1);\n+\t\tconst __m512i desc6_7 = _mm512_bsrli_epi128(desc4_5, 8);\n+\n+\t\t_mm512_storeu_si512((void *)rxdp, desc0_1);\n+\t\t_mm512_storeu_si512((void *)(rxdp + 2), desc2_3);\n+\t\t_mm512_storeu_si512((void *)(rxdp + 4), desc4_5);\n+\t\t_mm512_storeu_si512((void *)(rxdp + 6), desc6_7);\n+\n+\t\trxp += IDPF_DESCS_PER_LOOP_AVX;\n+\t\trxdp += IDPF_DESCS_PER_LOOP_AVX;\n+\t\tcache->len -= IDPF_DESCS_PER_LOOP_AVX;\n+\t}\n+\n+\trxq->rxrearm_start += IDPF_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_rx_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= IDPF_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t   (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tIDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);\n+}\n+\n+#define IDPF_RX_LEN_MASK 0x80808080\n+static __rte_always_inline uint16_t\n+_idpf_singleq_recv_raw_pkts_avx512(struct idpf_rx_queue *rxq,\n+\t\t\t\t   struct rte_mbuf **rx_pkts,\n+\t\t\t\t   uint16_t nb_pkts)\n+{\n+#ifdef IDPF_RX_PTYPE_OFFLOAD\n+\tconst uint32_t *type_table = rxq->adapter->ptype_tbl;\n+#endif\n+\n+\tconst __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0,\n+\t\t\t\t\t\t    rxq->mbuf_initializer);\n+\tstruct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];\n+\tvolatile union virtchnl2_rx_desc *rxdp = rxq->rx_ring;\n+\n+\trxdp += rxq->rx_tail;\n+\n+\trte_prefetch0(rxdp);\n+\n+\t/* nb_pkts has to be floor-aligned to IDPF_DESCS_PER_LOOP_AVX */\n+\tnb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IDPF_DESCS_PER_LOOP_AVX);\n+\n+\t/* See if we need to rearm the RX queue - gives the prefetch a bit\n+\t * of time to act\n+\t */\n+\tif (rxq->rxrearm_nb > IDPF_RXQ_REARM_THRESH)\n+\t\tidpf_singleq_rearm(rxq);\n+\n+\t/* Before we start moving massive data around, check to see if\n+\t * there is actually a packet available\n+\t */\n+\tif ((rxdp->flex_nic_wb.status_error0  &\n+\t      rte_cpu_to_le_32(1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0)\n+\t\treturn 0;\n+\n+\t/* 8 packets DD mask, LSB in each 32-bit value */\n+\tconst __m256i dd_check = _mm256_set1_epi32(1);\n+\n+\t/* mask to shuffle from desc. to mbuf (4 descriptors)*/\n+\tconst __m512i shuf_msk =\n+\t\t_mm512_set_epi32\n+\t\t\t(/* 1st descriptor */\n+\t\t\t 0xFFFFFFFF,    /* rss set as unknown */\n+\t\t\t 0xFFFF0504,    /* vlan_macip set as unknown */\n+\t\t\t\t\t/* octet 15~14, 16 bits data_len */\n+\t\t\t 0xFFFF0504,    /* skip high 16 bits pkt_len, zero out */\n+\t\t\t\t\t/* octet 15~14, low 16 bits pkt_len */\n+\t\t\t 0xFFFFFFFF,    /* pkt_type set as unknown */\n+\t\t\t /* 2nd descriptor */\n+\t\t\t 0xFFFFFFFF,    /* rss set as unknown */\n+\t\t\t 0xFFFF0504,    /* vlan_macip set as unknown */\n+\t\t\t\t\t/* octet 15~14, 16 bits data_len */\n+\t\t\t 0xFFFF0504,    /* skip high 16 bits pkt_len, zero out */\n+\t\t\t\t\t/* octet 15~14, low 16 bits pkt_len */\n+\t\t\t 0xFFFFFFFF,    /* pkt_type set as unknown */\n+\t\t\t /* 3rd descriptor */\n+\t\t\t 0xFFFFFFFF,    /* rss set as unknown */\n+\t\t\t 0xFFFF0504,    /* vlan_macip set as unknown */\n+\t\t\t\t\t/* octet 15~14, 16 bits data_len */\n+\t\t\t 0xFFFF0504,    /* skip high 16 bits pkt_len, zero out */\n+\t\t\t\t\t/* octet 15~14, low 16 bits pkt_len */\n+\t\t\t 0xFFFFFFFF,    /* pkt_type set as unknown */\n+\t\t\t /* 4th descriptor */\n+\t\t\t 0xFFFFFFFF,    /* rss set as unknown */\n+\t\t\t 0xFFFF0504,    /* vlan_macip set as unknown */\n+\t\t\t\t\t/* octet 15~14, 16 bits data_len */\n+\t\t\t 0xFFFF0504,    /* skip high 16 bits pkt_len, zero out */\n+\t\t\t\t\t/* octet 15~14, low 16 bits pkt_len */\n+\t\t\t 0xFFFFFFFF     /* pkt_type set as unknown */\n+\t\t\t);\n+\t/**\n+\t * compile-time check the shuffle layout is correct.\n+\t * NOTE: the first field (lowest address) is given last in set_epi\n+\t * calls above.\n+\t */\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);\n+\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=\n+\t\t\t offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);\n+\n+\tuint16_t i, received;\n+\n+\tfor (i = 0, received = 0; i < nb_pkts;\n+\t     i += IDPF_DESCS_PER_LOOP_AVX,\n+\t     rxdp += IDPF_DESCS_PER_LOOP_AVX) {\n+\t\t/* step 1, copy over 8 mbuf pointers to rx_pkts array */\n+\t\t_mm256_storeu_si256((void *)&rx_pkts[i],\n+\t\t\t\t    _mm256_loadu_si256((void *)&sw_ring[i]));\n+#ifdef RTE_ARCH_X86_64\n+\t\t_mm256_storeu_si256\n+\t\t\t((void *)&rx_pkts[i + 4],\n+\t\t\t _mm256_loadu_si256((void *)&sw_ring[i + 4]));\n+#endif\n+\n+\t\t__m512i raw_desc0_3, raw_desc4_7;\n+\t\tconst __m128i raw_desc7 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 7));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc6 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 6));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc5 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 5));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc4 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 4));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc3 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 3));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc2 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 2));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc1 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 1));\n+\t\trte_compiler_barrier();\n+\t\tconst __m128i raw_desc0 =\n+\t\t\t_mm_load_si128((void *)(rxdp + 0));\n+\n+\t\traw_desc4_7 = _mm512_broadcast_i32x4(raw_desc4);\n+\t\traw_desc4_7 = _mm512_inserti32x4(raw_desc4_7, raw_desc5, 1);\n+\t\traw_desc4_7 = _mm512_inserti32x4(raw_desc4_7, raw_desc6, 2);\n+\t\traw_desc4_7 = _mm512_inserti32x4(raw_desc4_7, raw_desc7, 3);\n+\t\traw_desc0_3 = _mm512_broadcast_i32x4(raw_desc0);\n+\t\traw_desc0_3 = _mm512_inserti32x4(raw_desc0_3, raw_desc1, 1);\n+\t\traw_desc0_3 = _mm512_inserti32x4(raw_desc0_3, raw_desc2, 2);\n+\t\traw_desc0_3 = _mm512_inserti32x4(raw_desc0_3, raw_desc3, 3);\n+\n+\t\t/**\n+\t\t * convert descriptors 4-7 into mbufs, adjusting length and\n+\t\t * re-arranging fields. Then write into the mbuf\n+\t\t */\n+\t\tconst __m512i len4_7 = _mm512_slli_epi32(raw_desc4_7,\n+\t\t\t\t\t\t\t PKTLEN_SHIFT);\n+\t\tconst __m512i desc4_7 = _mm512_mask_blend_epi16(IDPF_RX_LEN_MASK,\n+\t\t\t\t\t\t\t\traw_desc4_7,\n+\t\t\t\t\t\t\t\tlen4_7);\n+\t\t__m512i mb4_7 = _mm512_shuffle_epi8(desc4_7, shuf_msk);\n+\n+#ifdef IDPF_RX_PTYPE_OFFLOAD\n+\t\t/**\n+\t\t * to get packet types, shift 64-bit values down 30 bits\n+\t\t * and so ptype is in lower 8-bits in each\n+\t\t */\n+\t\tconst __m512i ptypes4_7 = _mm512_srli_epi64(desc4_7, 16);\n+\t\tconst __m256i ptypes6_7 = _mm512_extracti64x4_epi64(ptypes4_7, 1);\n+\t\tconst __m256i ptypes4_5 = _mm512_extracti64x4_epi64(ptypes4_7, 0);\n+\t\tconst uint8_t ptype7 = _mm256_extract_epi8(ptypes6_7, 16);\n+\t\tconst uint8_t ptype6 = _mm256_extract_epi8(ptypes6_7, 0);\n+\t\tconst uint8_t ptype5 = _mm256_extract_epi8(ptypes4_5, 16);\n+\t\tconst uint8_t ptype4 = _mm256_extract_epi8(ptypes4_5, 0);\n+\n+\t\tconst __m512i ptype4_7 = _mm512_set_epi32\n+\t\t\t(0, 0, 0, type_table[ptype7],\n+\t\t\t 0, 0, 0, type_table[ptype6],\n+\t\t\t 0, 0, 0, type_table[ptype5],\n+\t\t\t 0, 0, 0, type_table[ptype4]);\n+\t\tmb4_7 = _mm512_mask_blend_epi32(0x1111, mb4_7, ptype4_7);\n+#endif\n+\n+\t\t/**\n+\t\t * convert descriptors 0-3 into mbufs, adjusting length and\n+\t\t * re-arranging fields. Then write into the mbuf\n+\t\t */\n+\t\tconst __m512i len0_3 = _mm512_slli_epi32(raw_desc0_3,\n+\t\t\t\t\t\t\t PKTLEN_SHIFT);\n+\t\tconst __m512i desc0_3 = _mm512_mask_blend_epi16(IDPF_RX_LEN_MASK,\n+\t\t\t\t\t\t\t\traw_desc0_3,\n+\t\t\t\t\t\t\t\tlen0_3);\n+\t\t__m512i mb0_3 = _mm512_shuffle_epi8(desc0_3, shuf_msk);\n+\n+#ifdef IDPF_RX_PTYPE_OFFLOAD\n+\t\t/* get the packet types */\n+\t\tconst __m512i ptypes0_3 = _mm512_srli_epi64(desc0_3, 16);\n+\t\tconst __m256i ptypes2_3 = _mm512_extracti64x4_epi64(ptypes0_3, 1);\n+\t\tconst __m256i ptypes0_1 = _mm512_extracti64x4_epi64(ptypes0_3, 0);\n+\t\tconst uint8_t ptype3 = _mm256_extract_epi8(ptypes2_3, 16);\n+\t\tconst uint8_t ptype2 = _mm256_extract_epi8(ptypes2_3, 0);\n+\t\tconst uint8_t ptype1 = _mm256_extract_epi8(ptypes0_1, 16);\n+\t\tconst uint8_t ptype0 = _mm256_extract_epi8(ptypes0_1, 0);\n+\n+\t\tconst __m512i ptype0_3 = _mm512_set_epi32\n+\t\t\t(0, 0, 0, type_table[ptype3],\n+\t\t\t 0, 0, 0, type_table[ptype2],\n+\t\t\t 0, 0, 0, type_table[ptype1],\n+\t\t\t 0, 0, 0, type_table[ptype0]);\n+\t\tmb0_3 = _mm512_mask_blend_epi32(0x1111, mb0_3, ptype0_3);\n+#endif\n+\n+\t\t/**\n+\t\t * use permute/extract to get status content\n+\t\t * After the operations, the packets status flags are in the\n+\t\t * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]\n+\t\t */\n+\t\t/* merge the status bits into one register */\n+\t\tconst __m512i status_permute_msk = _mm512_set_epi32\n+\t\t\t(0, 0, 0, 0,\n+\t\t\t 0, 0, 0, 0,\n+\t\t\t 22, 30, 6, 14,\n+\t\t\t 18, 26, 2, 10);\n+\t\tconst __m512i raw_status0_7 = _mm512_permutex2var_epi32\n+\t\t\t(raw_desc4_7, status_permute_msk, raw_desc0_3);\n+\t\t__m256i status0_7 = _mm512_extracti64x4_epi64\n+\t\t\t(raw_status0_7, 0);\n+\n+\t\t/* now do flag manipulation */\n+\n+\t\t/**\n+\t\t * At this point, we have the 8 sets of flags in the low 16-bits\n+\t\t * of each 32-bit value.\n+\t\t * We want to extract these, and merge them with the mbuf init\n+\t\t * data so we can do a single write to the mbuf to set the flags\n+\t\t * and all the other initialization fields. Extracting the\n+\t\t * appropriate flags means that we have to do a shift and blend\n+\t\t * for each mbuf before we do the write. However, we can also\n+\t\t * add in the previously computed rx_descriptor fields to\n+\t\t * make a single 256-bit write per mbuf\n+\t\t */\n+\t\t/* check the structure matches expectations */\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=\n+\t\t\t\t offsetof(struct rte_mbuf, rearm_data) + 8);\n+\t\tRTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=\n+\t\t\t\t RTE_ALIGN(offsetof(struct rte_mbuf,\n+\t\t\t\t\t\t    rearm_data),\n+\t\t\t\t\t\t    16));\n+\t\t/* build up data and do writes */\n+\t\t__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,\n+\t\t\trearm6, rearm7;\n+\t\tconst __m256i mb4_5 = _mm512_extracti64x4_epi64(mb4_7, 0);\n+\t\tconst __m256i mb6_7 = _mm512_extracti64x4_epi64(mb4_7, 1);\n+\t\tconst __m256i mb0_1 = _mm512_extracti64x4_epi64(mb0_3, 0);\n+\t\tconst __m256i mb2_3 = _mm512_extracti64x4_epi64(mb0_3, 1);\n+\n+\t\trearm6 = _mm256_permute2f128_si256(mbuf_init, mb6_7, 0x20);\n+\t\trearm4 = _mm256_permute2f128_si256(mbuf_init, mb4_5, 0x20);\n+\t\trearm2 = _mm256_permute2f128_si256(mbuf_init, mb2_3, 0x20);\n+\t\trearm0 = _mm256_permute2f128_si256(mbuf_init, mb0_1, 0x20);\n+\n+\t\t/* write to mbuf */\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,\n+\t\t\t\t    rearm6);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,\n+\t\t\t\t    rearm4);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,\n+\t\t\t\t    rearm2);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,\n+\t\t\t\t    rearm0);\n+\n+\t\trearm7 = _mm256_blend_epi32(mbuf_init, mb6_7, 0xF0);\n+\t\trearm5 = _mm256_blend_epi32(mbuf_init, mb4_5, 0xF0);\n+\t\trearm3 = _mm256_blend_epi32(mbuf_init, mb2_3, 0xF0);\n+\t\trearm1 = _mm256_blend_epi32(mbuf_init, mb0_1, 0xF0);\n+\n+\t\t/* again write to mbufs */\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,\n+\t\t\t\t    rearm7);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,\n+\t\t\t\t    rearm5);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,\n+\t\t\t\t    rearm3);\n+\t\t_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,\n+\t\t\t\t    rearm1);\n+\n+\t\t/* perform dd_check */\n+\t\tstatus0_7 = _mm256_and_si256(status0_7, dd_check);\n+\t\tstatus0_7 = _mm256_packs_epi32(status0_7,\n+\t\t\t\t\t       _mm256_setzero_si256());\n+\n+\t\tuint64_t burst = __builtin_popcountll\n+\t\t\t\t\t(_mm_cvtsi128_si64\n+\t\t\t\t\t\t(_mm256_extracti128_si256\n+\t\t\t\t\t\t\t(status0_7, 1)));\n+\t\tburst += __builtin_popcountll\n+\t\t\t\t(_mm_cvtsi128_si64\n+\t\t\t\t\t(_mm256_castsi256_si128(status0_7)));\n+\t\treceived += burst;\n+\t\tif (burst != IDPF_DESCS_PER_LOOP_AVX)\n+\t\t\tbreak;\n+\t}\n+\n+\t/* update tail pointers */\n+\trxq->rx_tail += received;\n+\trxq->rx_tail &= (rxq->nb_rx_desc - 1);\n+\tif ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep aligned */\n+\t\trxq->rx_tail--;\n+\t\treceived--;\n+\t}\n+\trxq->rxrearm_nb += received;\n+\treturn received;\n+}\n+\n+/**\n+ * Notice:\n+ * - nb_pkts < IDPF_DESCS_PER_LOOP, just return no packet\n+ */\n+uint16_t\n+idpf_singleq_recv_pkts_avx512(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\t\t  uint16_t nb_pkts)\n+{\n+\treturn _idpf_singleq_recv_raw_pkts_avx512(rx_queue, rx_pkts, nb_pkts);\n+}\n+\n+static __rte_always_inline int\n+idpf_tx_free_bufs_avx512(struct idpf_tx_queue *txq)\n+{\n+\tstruct idpf_tx_vec_entry *txep;\n+\tuint32_t n;\n+\tuint32_t i;\n+\tint nb_free = 0;\n+\tstruct rte_mbuf *m, *free[txq->rs_thresh];\n+\n+\t/* check DD bits on threshold descriptor */\n+\tif ((txq->tx_ring[txq->next_dd].qw1.cmd_dtype &\n+\t\t\trte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=\n+\t\t\trte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))\n+\t\treturn 0;\n+\n+\tn = txq->rs_thresh;\n+\n+\t /* first buffer to free from S/W ring is at index\n+\t  * tx_next_dd - (tx_rs_thresh-1)\n+\t  */\n+\ttxep = (void *)txq->sw_ring;\n+\ttxep += txq->next_dd - (n - 1);\n+\n+\tif (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {\n+\t\tstruct rte_mempool *mp = txep[0].mbuf->pool;\n+\t\tstruct rte_mempool_cache *cache = rte_mempool_default_cache(mp,\n+\t\t\t\t\t\t\t\trte_lcore_id());\n+\t\tvoid **cache_objs;\n+\n+\t\tif (cache == NULL || cache->len == 0)\n+\t\t\tgoto normal;\n+\n+\t\tcache_objs = &cache->objs[cache->len];\n+\n+\t\tif (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {\n+\t\t\trte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);\n+\t\t\tgoto done;\n+\t\t}\n+\n+\t\t/* The cache follows the following algorithm\n+\t\t *   1. Add the objects to the cache\n+\t\t *   2. Anything greater than the cache min value (if it crosses the\n+\t\t *   cache flush threshold) is flushed to the ring.\n+\t\t */\n+\t\t/* Add elements back into the cache */\n+\t\tuint32_t copied = 0;\n+\t\t/* n is multiple of 32 */\n+\t\twhile (copied < n) {\n+\t\t\tconst __m512i a = _mm512_loadu_si512(&txep[copied]);\n+\t\t\tconst __m512i b = _mm512_loadu_si512(&txep[copied + 8]);\n+\t\t\tconst __m512i c = _mm512_loadu_si512(&txep[copied + 16]);\n+\t\t\tconst __m512i d = _mm512_loadu_si512(&txep[copied + 24]);\n+\n+\t\t\t_mm512_storeu_si512(&cache_objs[copied], a);\n+\t\t\t_mm512_storeu_si512(&cache_objs[copied + 8], b);\n+\t\t\t_mm512_storeu_si512(&cache_objs[copied + 16], c);\n+\t\t\t_mm512_storeu_si512(&cache_objs[copied + 24], d);\n+\t\t\tcopied += 32;\n+\t\t}\n+\t\tcache->len += n;\n+\n+\t\tif (cache->len >= cache->flushthresh) {\n+\t\t\trte_mempool_ops_enqueue_bulk(mp,\n+\t\t\t\t\t\t     &cache->objs[cache->size],\n+\t\t\t\t\t\t     cache->len - cache->size);\n+\t\t\tcache->len = cache->size;\n+\t\t}\n+\t\tgoto done;\n+\t}\n+\n+normal:\n+\tm = rte_pktmbuf_prefree_seg(txep[0].mbuf);\n+\tif (likely(m != NULL)) {\n+\t\tfree[0] = m;\n+\t\tnb_free = 1;\n+\t\tfor (i = 1; i < n; i++) {\n+\t\t\tm = rte_pktmbuf_prefree_seg(txep[i].mbuf);\n+\t\t\tif (likely(m != NULL)) {\n+\t\t\t\tif (likely(m->pool == free[0]->pool)) {\n+\t\t\t\t\tfree[nb_free++] = m;\n+\t\t\t\t} else {\n+\t\t\t\t\trte_mempool_put_bulk(free[0]->pool,\n+\t\t\t\t\t\t\t     (void *)free,\n+\t\t\t\t\t\t\t     nb_free);\n+\t\t\t\t\tfree[0] = m;\n+\t\t\t\t\tnb_free = 1;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\trte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);\n+\t} else {\n+\t\tfor (i = 1; i < n; i++) {\n+\t\t\tm = rte_pktmbuf_prefree_seg(txep[i].mbuf);\n+\t\t\tif (m != NULL)\n+\t\t\t\trte_mempool_put(m->pool, m);\n+\t\t}\n+\t}\n+\n+done:\n+\t/* buffers were freed, update counters */\n+\ttxq->nb_free = (uint16_t)(txq->nb_free + txq->rs_thresh);\n+\ttxq->next_dd = (uint16_t)(txq->next_dd + txq->rs_thresh);\n+\tif (txq->next_dd >= txq->nb_tx_desc)\n+\t\ttxq->next_dd = (uint16_t)(txq->rs_thresh - 1);\n+\n+\treturn txq->rs_thresh;\n+}\n+\n+static __rte_always_inline void\n+tx_backlog_entry_avx512(struct idpf_tx_vec_entry *txep,\n+\t\t\tstruct rte_mbuf **tx_pkts, uint16_t nb_pkts)\n+{\n+\tint i;\n+\n+\tfor (i = 0; i < (int)nb_pkts; ++i)\n+\t\ttxep[i].mbuf = tx_pkts[i];\n+}\n+\n+#define IDPF_FLEX_TXD_QW1_BUF_SZ_S 48\n+static __rte_always_inline void\n+idpf_vtx1(volatile struct idpf_flex_tx_desc *txdp,\n+\t  struct rte_mbuf *pkt, uint64_t flags)\n+{\n+\tuint64_t high_qw =\n+\t\t(IDPF_TX_DESC_DTYPE_FLEX_DATA << IDPF_FLEX_TXD_QW1_DTYPE_S |\n+\t\t ((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S) |\n+\t\t ((uint64_t)pkt->data_len << IDPF_FLEX_TXD_QW1_BUF_SZ_S));\n+\n+\t__m128i descriptor = _mm_set_epi64x(high_qw,\n+\t\t\t\t\t    pkt->buf_iova + pkt->data_off);\n+\t_mm_storeu_si128((__m128i *)txdp, descriptor);\n+}\n+\n+#define IDPF_TX_LEN_MASK 0xAA\n+#define IDPF_TX_OFF_MASK 0x55\n+static __rte_always_inline void\n+idpf_vtx(volatile struct idpf_flex_tx_desc *txdp,\n+\t struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)\n+{\n+\tconst uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_FLEX_DATA  |\n+\t\t\t((uint64_t)flags  << IDPF_FLEX_TXD_QW1_CMD_S));\n+\n+\t/* if unaligned on 32-bit boundary, do one to align */\n+\tif (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {\n+\t\tidpf_vtx1(txdp, *pkt, flags);\n+\t\tnb_pkts--, txdp++, pkt++;\n+\t}\n+\n+\t/* do 4 at a time while possible, in bursts */\n+\tfor (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {\n+\t\tuint64_t hi_qw3 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[3]->data_len <<\n+\t\t\t IDPF_FLEX_TXD_QW1_BUF_SZ_S);\n+\t\tuint64_t hi_qw2 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[2]->data_len <<\n+\t\t\t IDPF_FLEX_TXD_QW1_BUF_SZ_S);\n+\t\tuint64_t hi_qw1 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[1]->data_len <<\n+\t\t\t IDPF_FLEX_TXD_QW1_BUF_SZ_S);\n+\t\tuint64_t hi_qw0 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[0]->data_len <<\n+\t\t\t IDPF_FLEX_TXD_QW1_BUF_SZ_S);\n+\n+\t\t__m512i desc0_3 =\n+\t\t\t_mm512_set_epi64\n+\t\t\t\t(hi_qw3,\n+\t\t\t\t pkt[3]->buf_iova + pkt[3]->data_off,\n+\t\t\t\t hi_qw2,\n+\t\t\t\t pkt[2]->buf_iova + pkt[2]->data_off,\n+\t\t\t\t hi_qw1,\n+\t\t\t\t pkt[1]->buf_iova + pkt[1]->data_off,\n+\t\t\t\t hi_qw0,\n+\t\t\t\t pkt[0]->buf_iova + pkt[0]->data_off);\n+\t\t_mm512_storeu_si512((void *)txdp, desc0_3);\n+\t}\n+\n+\t/* do any last ones */\n+\twhile (nb_pkts) {\n+\t\tidpf_vtx1(txdp, *pkt, flags);\n+\t\ttxdp++, pkt++, nb_pkts--;\n+\t}\n+}\n+\n+static __rte_always_inline uint16_t\n+idpf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t\t uint16_t nb_pkts)\n+{\n+\tstruct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;\n+\tvolatile struct idpf_flex_tx_desc *txdp;\n+\tstruct idpf_tx_vec_entry *txep;\n+\tuint16_t n, nb_commit, tx_id;\n+\tuint64_t flags = IDPF_TX_FLEX_DESC_CMD_EOP;\n+\tuint64_t rs = IDPF_TX_FLEX_DESC_CMD_RS | flags;\n+\n+\t/* cross rx_thresh boundary is not allowed */\n+\tnb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);\n+\n+\tif (txq->nb_free < txq->free_thresh)\n+\t\tidpf_tx_free_bufs_avx512(txq);\n+\n+\tnb_pkts = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts);\n+\tnb_commit = nb_pkts;\n+\tif (unlikely(nb_pkts == 0))\n+\t\treturn 0;\n+\n+\ttx_id = txq->tx_tail;\n+\ttxdp = &txq->tx_ring[tx_id];\n+\ttxep = (void *)txq->sw_ring;\n+\ttxep += tx_id;\n+\n+\ttxq->nb_free = (uint16_t)(txq->nb_free - nb_pkts);\n+\n+\tn = (uint16_t)(txq->nb_tx_desc - tx_id);\n+\tif (nb_commit >= n) {\n+\t\ttx_backlog_entry_avx512(txep, tx_pkts, n);\n+\n+\t\tidpf_vtx(txdp, tx_pkts, n - 1, flags);\n+\t\ttx_pkts += (n - 1);\n+\t\ttxdp += (n - 1);\n+\n+\t\tidpf_vtx1(txdp, *tx_pkts++, rs);\n+\n+\t\tnb_commit = (uint16_t)(nb_commit - n);\n+\n+\t\ttx_id = 0;\n+\t\ttxq->next_rs = (uint16_t)(txq->rs_thresh - 1);\n+\n+\t\t/* avoid reach the end of ring */\n+\t\ttxdp = &txq->tx_ring[tx_id];\n+\t\ttxep = (void *)txq->sw_ring;\n+\t\ttxep += tx_id;\n+\t}\n+\n+\ttx_backlog_entry_avx512(txep, tx_pkts, nb_commit);\n+\n+\tidpf_vtx(txdp, tx_pkts, nb_commit, flags);\n+\n+\ttx_id = (uint16_t)(tx_id + nb_commit);\n+\tif (tx_id > txq->next_rs) {\n+\t\ttxq->tx_ring[txq->next_rs].qw1.cmd_dtype |=\n+\t\t\trte_cpu_to_le_64(((uint64_t)IDPF_TX_FLEX_DESC_CMD_RS) <<\n+\t\t\t\t\t IDPF_FLEX_TXD_QW1_CMD_S);\n+\t\ttxq->next_rs =\n+\t\t\t(uint16_t)(txq->next_rs + txq->rs_thresh);\n+\t}\n+\n+\ttxq->tx_tail = tx_id;\n+\n+\tIDPF_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);\n+\n+\treturn nb_pkts;\n+}\n+\n+static __rte_always_inline uint16_t\n+idpf_xmit_pkts_vec_avx512_cmn(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t      uint16_t nb_pkts)\n+{\n+\tuint16_t nb_tx = 0;\n+\tstruct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;\n+\n+\twhile (nb_pkts) {\n+\t\tuint16_t ret, num;\n+\n+\t\tnum = (uint16_t)RTE_MIN(nb_pkts, txq->rs_thresh);\n+\t\tret = idpf_xmit_fixed_burst_vec_avx512(tx_queue, &tx_pkts[nb_tx],\n+\t\t\t\t\t\t       num);\n+\t\tnb_tx += ret;\n+\t\tnb_pkts -= ret;\n+\t\tif (ret < num)\n+\t\t\tbreak;\n+\t}\n+\n+\treturn nb_tx;\n+}\n+\n+uint16_t\n+idpf_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t     uint16_t nb_pkts)\n+{\n+\treturn idpf_xmit_pkts_vec_avx512_cmn(tx_queue, tx_pkts, nb_pkts);\n+}\n+\n+static inline void\n+idpf_singleq_tx_release_mbufs_avx512(struct idpf_tx_queue *txq)\n+{\n+\tunsigned int i;\n+\tconst uint16_t max_desc = (uint16_t)(txq->nb_tx_desc - 1);\n+\tstruct idpf_tx_vec_entry *swr = (void *)txq->sw_ring;\n+\n+\tif (txq->sw_ring == NULL || txq->nb_free == max_desc)\n+\t\treturn;\n+\n+\ti = txq->next_dd - txq->rs_thresh + 1;\n+\tif (txq->tx_tail < i) {\n+\t\tfor (; i < txq->nb_tx_desc; i++) {\n+\t\t\trte_pktmbuf_free_seg(swr[i].mbuf);\n+\t\t\tswr[i].mbuf = NULL;\n+\t\t}\n+\t\ti = 0;\n+\t}\n+}\n+\n+static const struct idpf_txq_ops avx512_singleq_tx_vec_ops = {\n+\t.release_mbufs = idpf_singleq_tx_release_mbufs_avx512,\n+};\n+\n+int __rte_cold\n+idpf_singleq_tx_vec_setup_avx512(struct idpf_tx_queue *txq)\n+{\n+\ttxq->ops = &avx512_singleq_tx_vec_ops;\n+\treturn 0;\n+}\ndiff --git a/drivers/net/idpf/idpf_rxtx_vec_common.h b/drivers/net/idpf/idpf_rxtx_vec_common.h\nnew file mode 100644\nindex 0000000000..0f4e10e154\n--- /dev/null\n+++ b/drivers/net/idpf/idpf_rxtx_vec_common.h\n@@ -0,0 +1,100 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2022 Intel Corporation\n+ */\n+\n+#ifndef _IDPF_RXTX_VEC_COMMON_H_\n+#define _IDPF_RXTX_VEC_COMMON_H_\n+#include <stdint.h>\n+#include <ethdev_driver.h>\n+#include <rte_malloc.h>\n+\n+#include \"idpf_ethdev.h\"\n+#include \"idpf_rxtx.h\"\n+\n+#ifndef __INTEL_COMPILER\n+#pragma GCC diagnostic ignored \"-Wcast-qual\"\n+#endif\n+\n+#define IDPF_VECTOR_PATH\t\t0\n+#define ICE_RX_NO_VECTOR_FLAGS (\t\t\\\n+\t\tRTE_ETH_RX_OFFLOAD_IPV4_CKSUM |\t\\\n+\t\tRTE_ETH_RX_OFFLOAD_UDP_CKSUM |\t\\\n+\t\tRTE_ETH_RX_OFFLOAD_TCP_CKSUM |\t\\\n+\t\tRTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |\t\\\n+\t\tRTE_ETH_RX_OFFLOAD_TIMESTAMP)\n+#define ICE_TX_NO_VECTOR_FLAGS (\t\t\\\n+\t\tRTE_ETH_TX_OFFLOAD_TCP_TSO |\t\\\n+\t\tRTE_ETH_TX_OFFLOAD_MULTI_SEGS)\n+\n+static inline int\n+idpf_rx_vec_queue_default(struct idpf_rx_queue *rxq)\n+{\n+\tif (rxq == NULL)\n+\t\treturn -1;\n+\n+\tif (rte_is_power_of_2(rxq->nb_rx_desc) == 0)\n+\t\treturn -1;\n+\n+\tif (rxq->rx_free_thresh < IDPF_VPMD_RX_MAX_BURST)\n+\t\treturn -1;\n+\n+\tif ((rxq->nb_rx_desc % rxq->rx_free_thresh) != 0)\n+\t\treturn -1;\n+\n+\tif ((rxq->offloads & ICE_RX_NO_VECTOR_FLAGS) != 0)\n+\t\treturn -1;\n+\n+\treturn IDPF_VECTOR_PATH;\n+}\n+\n+static inline int\n+idpf_tx_vec_queue_default(struct idpf_tx_queue *txq)\n+{\n+\tif (txq == NULL)\n+\t\treturn -1;\n+\n+\tif (txq->rs_thresh < IDPF_VPMD_TX_MAX_BURST ||\n+\t    (txq->rs_thresh & 3) != 0)\n+\t\treturn -1;\n+\n+\tif ((txq->offloads & ICE_TX_NO_VECTOR_FLAGS) != 0)\n+\t\treturn -1;\n+\n+\treturn IDPF_VECTOR_PATH;\n+}\n+\n+static inline int\n+idpf_rx_vec_dev_check_default(struct rte_eth_dev *dev)\n+{\n+\tint i;\n+\tstruct idpf_rx_queue *rxq;\n+\tint ret = 0;\n+\n+\tfor (i = 0; i < dev->data->nb_rx_queues; i++) {\n+\t\trxq = dev->data->rx_queues[i];\n+\t\tret = (idpf_rx_vec_queue_default(rxq));\n+\t\tif (ret < 0)\n+\t\t\treturn -1;\n+\t}\n+\n+\treturn IDPF_VECTOR_PATH;\n+}\n+\n+static inline int\n+idpf_tx_vec_dev_check_default(struct rte_eth_dev *dev)\n+{\n+\tint i;\n+\tstruct idpf_tx_queue *txq;\n+\tint ret = 0;\n+\n+\tfor (i = 0; i < dev->data->nb_tx_queues; i++) {\n+\t\ttxq = dev->data->tx_queues[i];\n+\t\tret = idpf_tx_vec_queue_default(txq);\n+\t\tif (ret < 0)\n+\t\t\treturn -1;\n+\t}\n+\n+\treturn IDPF_VECTOR_PATH;\n+}\n+\n+#endif /*_IDPF_RXTX_VEC_COMMON_H_*/\ndiff --git a/drivers/net/idpf/meson.build b/drivers/net/idpf/meson.build\nindex b632b76656..da99c098ab 100644\n--- a/drivers/net/idpf/meson.build\n+++ b/drivers/net/idpf/meson.build\n@@ -14,3 +14,31 @@ sources = files(\n     'idpf_rxtx.c',\n     'idpf_vchnl.c',\n )\n+\n+if arch_subdir == 'x86'\n+    idpf_avx512_cpu_support = (\n+        cc.get_define('__AVX512F__', args: machine_args) != '' and\n+        cc.get_define('__AVX512BW__', args: machine_args) != ''\n+    )\n+\n+    idpf_avx512_cc_support = (\n+        not machine_args.contains('-mno-avx512f') and\n+        cc.has_argument('-mavx512f') and\n+        cc.has_argument('-mavx512bw')\n+    )\n+\n+    if idpf_avx512_cpu_support == true or idpf_avx512_cc_support == true\n+        cflags += ['-DCC_AVX512_SUPPORT']\n+        avx512_args = [cflags, '-mavx512f', '-mavx512bw']\n+        if cc.has_argument('-march=skylake-avx512')\n+            avx512_args += '-march=skylake-avx512'\n+        endif\n+        idpf_avx512_lib = static_library('idpf_avx512_lib',\n+            'idpf_rxtx_vec_avx512.c',\n+            dependencies: [static_rte_common_idpf, static_rte_ethdev, static_rte_bus_pci,\n+            static_rte_kvargs, static_rte_hash],\n+            include_directories: includes,\n+            c_args: avx512_args)\n+        objs += idpf_avx512_lib.extract_objects('idpf_rxtx_vec_avx512.c')\n+    endif\n+endif\n",
    "prefixes": [
        "v11",
        "17/18"
    ]
}