get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/93760/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 93760,
    "url": "http://patchwork.dpdk.org/api/patches/93760/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20210602083110.5530-2-yuanx.wang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210602083110.5530-2-yuanx.wang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210602083110.5530-2-yuanx.wang@intel.com",
    "date": "2021-06-02T08:31:10",
    "name": "[1/1] lib/vhost: support async dequeue for split ring",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "ee86500e502f4eaa756673dd87216a8f19489d97",
    "submitter": {
        "id": 2087,
        "url": "http://patchwork.dpdk.org/api/people/2087/?format=api",
        "name": "Wang, YuanX",
        "email": "yuanx.wang@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patchwork.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20210602083110.5530-2-yuanx.wang@intel.com/mbox/",
    "series": [
        {
            "id": 17204,
            "url": "http://patchwork.dpdk.org/api/series/17204/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=17204",
            "date": "2021-06-02T08:31:09",
            "name": "lib/vhost: support async dequeue for split ring",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/17204/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/93760/comments/",
    "check": "fail",
    "checks": "http://patchwork.dpdk.org/api/patches/93760/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 17838A0524;\n\tWed,  2 Jun 2021 10:41:59 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id F009F4069F;\n\tWed,  2 Jun 2021 10:41:58 +0200 (CEST)",
            "from mga12.intel.com (mga12.intel.com [192.55.52.136])\n by mails.dpdk.org (Postfix) with ESMTP id 740E140689\n for <dev@dpdk.org>; Wed,  2 Jun 2021 10:41:57 +0200 (CEST)",
            "from orsmga001.jf.intel.com ([10.7.209.18])\n by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 02 Jun 2021 01:41:56 -0700",
            "from unknown (HELO localhost.localdomain) ([10.240.183.50])\n by orsmga001-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 02 Jun 2021 01:41:52 -0700"
        ],
        "IronPort-SDR": [
            "\n JPLehbZb3URD7IUnlomPXYeE7OoGzhhD7doPOR6+yByKsSlG1zCoqMnxalbnbadVUy2wp2B10u\n sd2wq33DGm0g==",
            "\n tw7RmVLi4vVPxzUZyyW1vE/dLhBhDanCgfxuPNI73Hgb0sM9JULtdAapZMWw56m2gm9HRX2Dj+\n e/xMrTLBzqOA=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,10002\"; a=\"183419305\"",
            "E=Sophos;i=\"5.83,241,1616482800\"; d=\"scan'208\";a=\"183419305\"",
            "E=Sophos;i=\"5.83,241,1616482800\"; d=\"scan'208\";a=\"479625959\""
        ],
        "From": "Yuan Wang <yuanx.wang@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "maxime.coquelin@redhat.com, chenbo.xia@intel.com, cheng1.jiang@intel.com,\n Yuan Wang <yuanx.wang@intel.com>, Wenwu Ma <wenwux.ma@intel.com>,\n Jiayu Hu <jiayu.hu@intel.com>",
        "Date": "Wed,  2 Jun 2021 08:31:10 +0000",
        "Message-Id": "<20210602083110.5530-2-yuanx.wang@intel.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20210602083110.5530-1-yuanx.wang@intel.com>",
        "References": "<20210602083110.5530-1-yuanx.wang@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH 1/1] lib/vhost: support async dequeue for split\n ring",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "This patch implements asynchronous dequeue data path for split ring.\nA new asynchronous dequeue function is introduced. With this function,\nthe application can try to receive packets from the guest with\noffloading large copies to the DMA engine, thus saving precious CPU\ncycles.\n\nSigned-off-by: Wenwu Ma <wenwux.ma@intel.com>\nSigned-off-by: Yuan Wang <yuanx.wang@intel.com>\nSigned-off-by: Jiayu Hu <jiayu.hu@intel.com>\n---\n doc/guides/prog_guide/vhost_lib.rst |  10 +\n examples/vhost/ioat.c               |  30 +-\n examples/vhost/ioat.h               |   3 +\n examples/vhost/main.c               |  60 +--\n lib/vhost/rte_vhost_async.h         |  44 ++-\n lib/vhost/version.map               |   3 +\n lib/vhost/virtio_net.c              | 549 ++++++++++++++++++++++++++++\n 7 files changed, 664 insertions(+), 35 deletions(-)",
    "diff": "diff --git a/doc/guides/prog_guide/vhost_lib.rst b/doc/guides/prog_guide/vhost_lib.rst\nindex 6b7206bc1d..785ab0fb34 100644\n--- a/doc/guides/prog_guide/vhost_lib.rst\n+++ b/doc/guides/prog_guide/vhost_lib.rst\n@@ -281,6 +281,16 @@ The following is an overview of some key Vhost API functions:\n   Poll enqueue completion status from async data path. Completed packets\n   are returned to applications through ``pkts``.\n \n+* ``rte_vhost_try_dequeue_burst(vid, queue_id, mbuf_pool, pkts, count, nr_inflight)``\n+\n+  Try to receive packets from the guest with offloading large packets\n+  to the DMA engine. Successfully dequeued packets are transfer\n+  completed and returned in ``pkts``. But there may be other packets\n+  that are sent from the guest but being transferred by the DMA engine,\n+  called in-flight packets. This function will return in-flight packets\n+  only after the DMA engine finishes transferring. The amount of\n+  in-flight packets by now is returned in ``nr_inflight``.\n+\n Vhost-user Implementations\n --------------------------\n \ndiff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c\nindex 2a2c2d7202..236306c9c7 100644\n--- a/examples/vhost/ioat.c\n+++ b/examples/vhost/ioat.c\n@@ -17,7 +17,6 @@ struct packet_tracker {\n \tunsigned short next_read;\n \tunsigned short next_write;\n \tunsigned short last_remain;\n-\tunsigned short ioat_space;\n };\n \n struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];\n@@ -61,18 +60,30 @@ open_ioat(const char *value)\n \t\tgoto out;\n \t}\n \twhile (i < args_nr) {\n+\t\tchar *txd, *rxd;\n+\t\tbool is_txd;\n \t\tchar *arg_temp = dma_arg[i];\n \t\tuint8_t sub_nr;\n+\n \t\tsub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');\n \t\tif (sub_nr != 2) {\n \t\t\tret = -1;\n \t\t\tgoto out;\n \t\t}\n \n-\t\tstart = strstr(ptrs[0], \"txd\");\n-\t\tif (start == NULL) {\n+\t\ttxd = strstr(ptrs[0], \"txd\");\n+\t\trxd = strstr(ptrs[0], \"rxd\");\n+\t\tif (txd == NULL && rxd == NULL) {\n \t\t\tret = -1;\n \t\t\tgoto out;\n+\t\t} else if (txd) {\n+\t\t\tis_txd = true;\n+\t\t\tstart = txd;\n+\t\t\tret |= ASYNC_RX_VHOST;\n+\t\t} else {\n+\t\t\tis_txd = false;\n+\t\t\tstart = rxd;\n+\t\t\tret |= ASYNC_TX_VHOST;\n \t\t}\n \n \t\tstart += 3;\n@@ -82,7 +93,8 @@ open_ioat(const char *value)\n \t\t\tgoto out;\n \t\t}\n \n-\t\tvring_id = 0 + VIRTIO_RXQ;\n+\t\tvring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;\n+\n \t\tif (rte_pci_addr_parse(ptrs[1],\n \t\t\t\t&(dma_info + vid)->dmas[vring_id].addr) < 0) {\n \t\t\tret = -1;\n@@ -113,7 +125,6 @@ open_ioat(const char *value)\n \t\t\tgoto out;\n \t\t}\n \t\trte_rawdev_start(dev_id);\n-\t\tcb_tracker[dev_id].ioat_space = IOAT_RING_SIZE - 1;\n \t\tdma_info->nr++;\n \t\ti++;\n \t}\n@@ -128,7 +139,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,\n \t\tstruct rte_vhost_async_status *opaque_data, uint16_t count)\n {\n \tuint32_t i_desc;\n-\tuint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;\n+\tuint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;\n \tstruct rte_vhost_iov_iter *src = NULL;\n \tstruct rte_vhost_iov_iter *dst = NULL;\n \tunsigned long i_seg;\n@@ -140,7 +151,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,\n \t\t\tsrc = descs[i_desc].src;\n \t\t\tdst = descs[i_desc].dst;\n \t\t\ti_seg = 0;\n-\t\t\tif (cb_tracker[dev_id].ioat_space < src->nr_segs)\n+\t\t\tif (rte_ioat_burst_capacity(dev_id) < src->nr_segs)\n \t\t\t\tbreak;\n \t\t\twhile (i_seg < src->nr_segs) {\n \t\t\t\trte_ioat_enqueue_copy(dev_id,\n@@ -155,7 +166,6 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,\n \t\t\t}\n \t\t\twrite &= mask;\n \t\t\tcb_tracker[dev_id].size_track[write] = src->nr_segs;\n-\t\t\tcb_tracker[dev_id].ioat_space -= src->nr_segs;\n \t\t\twrite++;\n \t\t}\n \t} else {\n@@ -181,8 +191,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,\n \t\tunsigned short mask = MAX_ENQUEUED_SIZE - 1;\n \t\tunsigned short i;\n \n-\t\tuint16_t dev_id = dma_bind[vid].dmas[queue_id * 2\n-\t\t\t\t+ VIRTIO_RXQ].dev_id;\n+\t\tuint16_t dev_id = dma_bind[vid].dmas[queue_id].dev_id;\n \t\tn_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump);\n \t\tif (n_seg < 0) {\n \t\t\tRTE_LOG(ERR,\n@@ -194,7 +203,6 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,\n \t\tif (n_seg == 0)\n \t\t\treturn 0;\n \n-\t\tcb_tracker[dev_id].ioat_space += n_seg;\n \t\tn_seg += cb_tracker[dev_id].last_remain;\n \n \t\tread = cb_tracker[dev_id].next_read;\ndiff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h\nindex 1aa28ed6a3..db7acefc02 100644\n--- a/examples/vhost/ioat.h\n+++ b/examples/vhost/ioat.h\n@@ -13,6 +13,9 @@\n #define IOAT_RING_SIZE 4096\n #define MAX_ENQUEUED_SIZE 4096\n \n+#define ASYNC_RX_VHOST\t1\n+#define ASYNC_TX_VHOST\t2\n+\n struct dma_info {\n \tstruct rte_pci_addr addr;\n \tuint16_t dev_id;\ndiff --git a/examples/vhost/main.c b/examples/vhost/main.c\nindex d2179eadb9..a5662a1a91 100644\n--- a/examples/vhost/main.c\n+++ b/examples/vhost/main.c\n@@ -93,7 +93,8 @@ static int client_mode;\n \n static int builtin_net_driver;\n \n-static int async_vhost_driver;\n+static int async_rx_vhost_driver;\n+static int async_tx_vhost_driver;\n \n static char *dma_type;\n \n@@ -671,13 +672,17 @@ us_vhost_parse_args(int argc, char **argv)\n \t\t\tbreak;\n \n \t\tcase OPT_DMAS_NUM:\n-\t\t\tif (open_dma(optarg) == -1) {\n+\t\t\tret = open_dma(optarg);\n+\t\t\tif (ret == -1) {\n \t\t\t\tRTE_LOG(INFO, VHOST_CONFIG,\n \t\t\t\t\t\"Wrong DMA args\\n\");\n \t\t\t\tus_vhost_usage(prgname);\n \t\t\t\treturn -1;\n \t\t\t}\n-\t\t\tasync_vhost_driver = 1;\n+\t\t\tif (ret & ASYNC_RX_VHOST)\n+\t\t\t\tasync_rx_vhost_driver = 1;\n+\t\t\tif (ret & ASYNC_TX_VHOST)\n+\t\t\t\tasync_tx_vhost_driver = 1;\n \t\t\tbreak;\n \n \t\tcase OPT_CLIENT_NUM:\n@@ -887,7 +892,7 @@ drain_vhost(struct vhost_dev *vdev)\n \n \tif (builtin_net_driver) {\n \t\tret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);\n-\t} else if (async_vhost_driver) {\n+\t} else if (async_rx_vhost_driver) {\n \t\tuint32_t cpu_cpl_nr = 0;\n \t\tuint16_t enqueue_fail = 0;\n \t\tstruct rte_mbuf *m_cpu_cpl[nr_xmit];\n@@ -914,7 +919,7 @@ drain_vhost(struct vhost_dev *vdev)\n \t\t\t\t__ATOMIC_SEQ_CST);\n \t}\n \n-\tif (!async_vhost_driver)\n+\tif (!async_rx_vhost_driver)\n \t\tfree_pkts(m, nr_xmit);\n }\n \n@@ -1217,7 +1222,7 @@ drain_eth_rx(struct vhost_dev *vdev)\n \tif (builtin_net_driver) {\n \t\tenqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,\n \t\t\t\t\t\tpkts, rx_count);\n-\t} else if (async_vhost_driver) {\n+\t} else if (async_rx_vhost_driver) {\n \t\tuint32_t cpu_cpl_nr = 0;\n \t\tuint16_t enqueue_fail = 0;\n \t\tstruct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];\n@@ -1245,7 +1250,7 @@ drain_eth_rx(struct vhost_dev *vdev)\n \t\t\t\t__ATOMIC_SEQ_CST);\n \t}\n \n-\tif (!async_vhost_driver)\n+\tif (!async_rx_vhost_driver)\n \t\tfree_pkts(pkts, rx_count);\n }\n \n@@ -1259,6 +1264,12 @@ drain_virtio_tx(struct vhost_dev *vdev)\n \tif (builtin_net_driver) {\n \t\tcount = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,\n \t\t\t\t\tpkts, MAX_PKT_BURST);\n+\t} else if (async_tx_vhost_driver) {\n+\t\tint nr_inflight;\n+\n+\t\tcount = rte_vhost_try_dequeue_burst(vdev->vid, VIRTIO_TXQ,\n+\t\t\t\tmbuf_pool, pkts, MAX_PKT_BURST, &nr_inflight);\n+\n \t} else {\n \t\tcount = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,\n \t\t\t\t\tmbuf_pool, pkts, MAX_PKT_BURST);\n@@ -1397,8 +1408,10 @@ destroy_device(int vid)\n \t\t\"(%d) device has been removed from data core\\n\",\n \t\tvdev->vid);\n \n-\tif (async_vhost_driver)\n+\tif (async_rx_vhost_driver)\n \t\trte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);\n+\tif (async_tx_vhost_driver)\n+\t\trte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);\n \n \trte_free(vdev);\n }\n@@ -1467,24 +1480,29 @@ new_device(int vid)\n \t\t\"(%d) device has been added to data core %d\\n\",\n \t\tvid, vdev->coreid);\n \n-\tif (async_vhost_driver) {\n-\t\tstruct rte_vhost_async_features f;\n-\t\tstruct rte_vhost_async_channel_ops channel_ops;\n+\tint ret = 0;\n+\tstruct rte_vhost_async_features f;\n+\tstruct rte_vhost_async_channel_ops channel_ops;\n \n-\t\tif (dma_type != NULL && strncmp(dma_type, \"ioat\", 4) == 0) {\n-\t\t\tchannel_ops.transfer_data = ioat_transfer_data_cb;\n-\t\t\tchannel_ops.check_completed_copies =\n-\t\t\t\tioat_check_completed_copies_cb;\n+\tif (dma_type != NULL && strncmp(dma_type, \"ioat\", 4) == 0) {\n+\t\tchannel_ops.transfer_data = ioat_transfer_data_cb;\n+\t\tchannel_ops.check_completed_copies =\n+\t\t\tioat_check_completed_copies_cb;\n \n-\t\t\tf.async_inorder = 1;\n-\t\t\tf.async_threshold = 256;\n+\t\tf.async_inorder = 1;\n+\t\tf.async_threshold = 0;\n \n-\t\t\treturn rte_vhost_async_channel_register(vid, VIRTIO_RXQ,\n-\t\t\t\tf.intval, &channel_ops);\n+\t\tif (async_rx_vhost_driver) {\n+\t\t\tret = rte_vhost_async_channel_register(\n+\t\t\t\tvid, VIRTIO_RXQ, f.intval, &channel_ops);\n+\t\t}\n+\t\tif (async_tx_vhost_driver && (ret == 0)) {\n+\t\t\tret = rte_vhost_async_channel_register(\n+\t\t\t\tvid, VIRTIO_TXQ, f.intval, &channel_ops);\n \t\t}\n \t}\n \n-\treturn 0;\n+\treturn ret;\n }\n \n /*\n@@ -1725,7 +1743,7 @@ main(int argc, char *argv[])\n \tfor (i = 0; i < nb_sockets; i++) {\n \t\tchar *file = socket_files + i * PATH_MAX;\n \n-\t\tif (async_vhost_driver)\n+\t\tif (async_rx_vhost_driver || async_tx_vhost_driver)\n \t\t\tflags = flags | RTE_VHOST_USER_ASYNC_COPY;\n \n \t\tret = rte_vhost_driver_register(file, flags);\ndiff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h\nindex 6faa31f5ad..0daf3e5576 100644\n--- a/lib/vhost/rte_vhost_async.h\n+++ b/lib/vhost/rte_vhost_async.h\n@@ -84,13 +84,21 @@ struct rte_vhost_async_channel_ops {\n };\n \n /**\n- * inflight async packet information\n+ * in-flight async packet information\n  */\n+struct async_nethdr {\n+\tstruct virtio_net_hdr hdr;\n+\tbool valid;\n+};\n+\n struct async_inflight_info {\n \tstruct rte_mbuf *mbuf;\n-\tuint16_t descs; /* num of descs inflight */\n+\tunion {\n+\t\tuint16_t descs; /* num of descs in-flight */\n+\t\tstruct async_nethdr nethdr;\n+\t};\n \tuint16_t nr_buffers; /* num of buffers inflight for packed ring */\n-};\n+} __rte_cache_aligned;\n \n /**\n  *  dma channel feature bit definition\n@@ -193,4 +201,34 @@ __rte_experimental\n uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \t\tstruct rte_mbuf **pkts, uint16_t count);\n \n+/**\n+ * This function tries to receive packets from the guest with offloading\n+ * large copies to the DMA engine. Successfully dequeued packets are\n+ * transfer completed, either by the CPU or the DMA engine, and they are\n+ * returned in \"pkts\". There may be other packets that are sent from\n+ * the guest but being transferred by the DMA engine, called in-flight\n+ * packets. The amount of in-flight packets by now is returned in\n+ * \"nr_inflight\". This function will return in-flight packets only after\n+ * the DMA engine finishes transferring.\n+ *\n+ * @param vid\n+ *  id of vhost device to dequeue data\n+ * @param queue_id\n+ *  queue id to dequeue data\n+ * @param pkts\n+ *  blank array to keep successfully dequeued packets\n+ * @param count\n+ *  size of the packet array\n+ * @param nr_inflight\n+ *  the amount of in-flight packets by now. If error occured, its\n+ *  value is set to -1.\n+ * @return\n+ *  num of successfully dequeued packets\n+ */\n+__rte_experimental\n+uint16_t\n+rte_vhost_try_dequeue_burst(int vid, uint16_t queue_id,\n+\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,\n+\tint *nr_inflight);\n+\n #endif /* _RTE_VHOST_ASYNC_H_ */\ndiff --git a/lib/vhost/version.map b/lib/vhost/version.map\nindex 9103a23cd4..2f82ab9713 100644\n--- a/lib/vhost/version.map\n+++ b/lib/vhost/version.map\n@@ -79,4 +79,7 @@ EXPERIMENTAL {\n \n \t# added in 21.05\n \trte_vhost_get_negotiated_protocol_features;\n+\n+\t# added in 21.08\n+\trte_vhost_try_dequeue_burst;\n };\ndiff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c\nindex 7ed86e4e43..b7994892ad 100644\n--- a/lib/vhost/virtio_net.c\n+++ b/lib/vhost/virtio_net.c\n@@ -3155,3 +3155,552 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,\n \n \treturn count;\n }\n+\n+static __rte_always_inline int\n+async_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,\n+\t\t  struct buf_vector *buf_vec, uint16_t nr_vec,\n+\t\t  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,\n+\t\t  struct iovec *src_iovec, struct iovec *dst_iovec,\n+\t\t  struct rte_vhost_iov_iter *src_it,\n+\t\t  struct rte_vhost_iov_iter *dst_it,\n+\t\t  struct async_nethdr *nethdr,\n+\t\t  bool legacy_ol_flags)\n+{\n+\tuint64_t buf_addr;\n+\tuint32_t tlen = 0;\n+\tuint32_t buf_avail, buf_offset, buf_len;\n+\tuint32_t mbuf_avail, mbuf_offset;\n+\tuint32_t cpy_len, cpy_threshold;\n+\t/* A counter to avoid desc dead loop chain */\n+\tuint16_t vec_idx = 0;\n+\tint tvec_idx = 0;\n+\tstruct rte_mbuf *cur = m, *prev = m;\n+\tstruct virtio_net_hdr tmp_hdr;\n+\tstruct virtio_net_hdr *hdr = NULL;\n+\tstruct batch_copy_elem *batch_copy = vq->batch_copy_elems;\n+\n+\tbuf_addr = buf_vec[vec_idx].buf_addr;\n+\tbuf_len = buf_vec[vec_idx].buf_len;\n+\n+\tif (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))\n+\t\treturn -1;\n+\n+\tcpy_threshold = vq->async_threshold;\n+\n+\tif (virtio_net_with_host_offload(dev)) {\n+\t\tif (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {\n+\t\t\t/*\n+\t\t\t * No luck, the virtio-net header doesn't fit\n+\t\t\t * in a contiguous virtual area.\n+\t\t\t */\n+\t\t\tcopy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);\n+\t\t\thdr = &tmp_hdr;\n+\t\t} else {\n+\t\t\thdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);\n+\t\t}\n+\t}\n+\n+\t/*\n+\t * A virtio driver normally uses at least 2 desc buffers\n+\t * for Tx: the first for storing the header, and others\n+\t * for storing the data.\n+\t */\n+\tif (unlikely(buf_len < dev->vhost_hlen)) {\n+\t\tbuf_offset = dev->vhost_hlen - buf_len;\n+\t\tvec_idx++;\n+\t\tbuf_addr = buf_vec[vec_idx].buf_addr;\n+\t\tbuf_len = buf_vec[vec_idx].buf_len;\n+\t\tbuf_avail  = buf_len - buf_offset;\n+\t} else if (buf_len == dev->vhost_hlen) {\n+\t\tif (unlikely(++vec_idx >= nr_vec))\n+\t\t\treturn -1;\n+\t\tbuf_addr = buf_vec[vec_idx].buf_addr;\n+\t\tbuf_len = buf_vec[vec_idx].buf_len;\n+\n+\t\tbuf_offset = 0;\n+\t\tbuf_avail = buf_len;\n+\t} else {\n+\t\tbuf_offset = dev->vhost_hlen;\n+\t\tbuf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;\n+\t}\n+\n+\tPRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),\n+\t\t\t(uint32_t)buf_avail, 0);\n+\n+\tmbuf_offset = 0;\n+\tmbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;\n+\twhile (1) {\n+\t\tcpy_len = RTE_MIN(buf_avail, mbuf_avail);\n+\n+\t\tif (cpy_len >= cpy_threshold) {\n+\t\t\tasync_fill_vec(src_iovec + tvec_idx,\n+\t\t\t\t(void *)((uintptr_t)(buf_addr + buf_offset)),\n+\t\t\t\t(size_t)cpy_len);\n+\t\t\tasync_fill_vec(dst_iovec + tvec_idx,\n+\t\t\t\trte_pktmbuf_mtod_offset(cur,\n+\t\t\t\t\tvoid *, mbuf_offset),\n+\t\t\t\t(size_t)cpy_len);\n+\t\t\ttvec_idx++;\n+\t\t\ttlen += cpy_len;\n+\t\t} else if (likely(cpy_len > MAX_BATCH_LEN ||\n+\t\t\t\tvq->batch_copy_nb_elems >= vq->size ||\n+\t\t\t\t(hdr && cur == m))) {\n+\t\t\trte_memcpy(rte_pktmbuf_mtod_offset(cur,\n+\t\t\t\t\tvoid *, mbuf_offset),\n+\t\t\t\t(void *)((uintptr_t)(buf_addr + buf_offset)),\n+\t\t\t\tcpy_len);\n+\t\t} else {\n+\t\t\tbatch_copy[vq->batch_copy_nb_elems].dst =\n+\t\t\t\trte_pktmbuf_mtod_offset(cur,\n+\t\t\t\t\tvoid *, mbuf_offset);\n+\t\t\tbatch_copy[vq->batch_copy_nb_elems].src =\n+\t\t\t\t(void *)((uintptr_t)(buf_addr + buf_offset));\n+\t\t\tbatch_copy[vq->batch_copy_nb_elems].len = cpy_len;\n+\t\t\tvq->batch_copy_nb_elems++;\n+\t\t}\n+\n+\t\tmbuf_avail  -= cpy_len;\n+\t\tmbuf_offset += cpy_len;\n+\t\tbuf_avail  -= cpy_len;\n+\t\tbuf_offset += cpy_len;\n+\n+\t\t/* This buf reaches to its end, get the next one */\n+\t\tif (buf_avail == 0) {\n+\t\t\tif (++vec_idx >= nr_vec)\n+\t\t\t\tbreak;\n+\n+\t\t\tbuf_addr = buf_vec[vec_idx].buf_addr;\n+\t\t\tbuf_len = buf_vec[vec_idx].buf_len;\n+\n+\t\t\tbuf_offset = 0;\n+\t\t\tbuf_avail = buf_len;\n+\n+\t\t\tPRINT_PACKET(dev, (uintptr_t)buf_addr,\n+\t\t\t\t\t(uint32_t)buf_avail, 0);\n+\t\t}\n+\n+\t\t/*\n+\t\t * This mbuf reaches to its end, get a new one\n+\t\t * to hold more data.\n+\t\t */\n+\t\tif (mbuf_avail == 0) {\n+\t\t\tcur = rte_pktmbuf_alloc(mbuf_pool);\n+\t\t\tif (unlikely(cur == NULL)) {\n+\t\t\t\tVHOST_LOG_DATA(ERR, \"Failed to \"\n+\t\t\t\t\t\"allocate memory for mbuf.\\n\");\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\n+\t\t\tprev->next = cur;\n+\t\t\tprev->data_len = mbuf_offset;\n+\t\t\tm->nb_segs += 1;\n+\t\t\tm->pkt_len += mbuf_offset;\n+\t\t\tprev = cur;\n+\n+\t\t\tmbuf_offset = 0;\n+\t\t\tmbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;\n+\t\t}\n+\t}\n+\n+\tprev->data_len = mbuf_offset;\n+\tm->pkt_len += mbuf_offset;\n+\n+\tif (hdr && tlen) {\n+\t\tnethdr->valid = true;\n+\t\tnethdr->hdr = *hdr;\n+\t} else if (hdr)\n+\t\tvhost_dequeue_offload(hdr, m, legacy_ol_flags);\n+\n+\tif (tlen) {\n+\t\tasync_fill_iter(src_it, tlen, src_iovec, tvec_idx);\n+\t\tasync_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);\n+\t} else\n+\t\tsrc_it->count = 0;\n+\n+\treturn 0;\n+}\n+\n+static __rte_always_inline uint16_t\n+async_poll_dequeue_completed_split(struct virtio_net *dev,\n+\t\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\t\tstruct rte_mbuf **pkts, uint16_t count, bool legacy_ol_flags)\n+{\n+\tuint16_t n_pkts_cpl = 0, n_pkts_put = 0;\n+\tuint16_t start_idx, pkt_idx, from;\n+\tstruct async_inflight_info *pkts_info;\n+\n+\tpkt_idx = vq->async_pkts_idx & (vq->size - 1);\n+\tpkts_info = vq->async_pkts_info;\n+\tstart_idx = virtio_dev_rx_async_get_info_idx(pkt_idx, vq->size,\n+\t\t\tvq->async_pkts_inflight_n);\n+\n+\tif (count > vq->async_last_pkts_n) {\n+\t\tn_pkts_cpl = vq->async_ops.check_completed_copies(dev->vid,\n+\t\t\tqueue_id, 0, count - vq->async_last_pkts_n);\n+\t}\n+\n+\tn_pkts_cpl += vq->async_last_pkts_n;\n+\tif (unlikely(n_pkts_cpl == 0))\n+\t\treturn 0;\n+\n+\tn_pkts_put = RTE_MIN(count, n_pkts_cpl);\n+\n+\tfor (pkt_idx = 0; pkt_idx < n_pkts_put; pkt_idx++) {\n+\t\tfrom = (start_idx + pkt_idx) & (vq->size - 1);\n+\t\tpkts[pkt_idx] = pkts_info[from].mbuf;\n+\n+\t\tif (pkts_info[from].nethdr.valid) {\n+\t\t\tvhost_dequeue_offload(&pkts_info[from].nethdr.hdr,\n+\t\t\t\t\tpkts[pkt_idx], legacy_ol_flags);\n+\t\t}\n+\n+\t\tfrom = vq->last_async_desc_idx_split & (vq->size - 1);\n+\t\tupdate_shadow_used_ring_split(vq,\n+\t\t\t\tvq->async_descs_split[from].id, 0);\n+\t\tvq->last_async_desc_idx_split++;\n+\t}\n+\tvq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;\n+\n+\tif (n_pkts_put)\n+\t\tvq->async_pkts_inflight_n -= n_pkts_put;\n+\n+\treturn n_pkts_put;\n+}\n+\n+static __rte_always_inline uint16_t\n+virtio_dev_tx_async_split(struct virtio_net *dev,\n+\t\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\t\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,\n+\t\tuint16_t count, bool legacy_ol_flags)\n+{\n+\tstatic bool allocerr_warned;\n+\tuint16_t pkt_idx;\n+\tuint16_t free_entries;\n+\tuint16_t slot_idx = 0;\n+\tuint16_t segs_await = 0;\n+\tuint16_t nr_done_pkts = 0, nr_async_pkts = 0;\n+\tuint16_t nr_async_burst = 0;\n+\tuint16_t pkt_err = 0;\n+\tuint16_t iovec_idx = 0, it_idx = 0;\n+\n+\tstruct rte_vhost_iov_iter *it_pool = vq->it_pool;\n+\tstruct iovec *vec_pool = vq->vec_pool;\n+\tstruct iovec *src_iovec = vec_pool;\n+\tstruct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\tstruct rte_vhost_async_desc tdes[MAX_PKT_BURST];\n+\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n+\n+\tstruct async_pkt_index {\n+\t\tuint16_t last_avail_idx;\n+\t} async_pkts_log[MAX_PKT_BURST];\n+\n+\tnr_done_pkts = async_poll_dequeue_completed_split(dev, vq, queue_id,\n+\t\t\t\t\t\tpkts, count, legacy_ol_flags);\n+\tif (unlikely(nr_done_pkts == count))\n+\t\tgoto out;\n+\n+\t/**\n+\t * The ordering between avail index and\n+\t * desc reads needs to be enforced.\n+\t */\n+\tfree_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -\n+\t\t\tvq->last_avail_idx;\n+\tif (free_entries == 0)\n+\t\tgoto out;\n+\n+\trte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);\n+\n+\tcount = RTE_MIN(count - nr_done_pkts, MAX_PKT_BURST);\n+\tcount = RTE_MIN(count, free_entries);\n+\tVHOST_LOG_DATA(DEBUG, \"(%d) about to dequeue %u buffers\\n\",\n+\t\t\tdev->vid, count);\n+\n+\tfor (pkt_idx = 0; pkt_idx < count; pkt_idx++) {\n+\t\tuint16_t head_idx = 0;\n+\t\tuint16_t nr_vec = 0;\n+\t\tuint32_t buf_len;\n+\t\tint err;\n+\t\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n+\t\tstruct rte_mbuf *pkt;\n+\n+\t\tif (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx,\n+\t\t\t\t\t\t&nr_vec, buf_vec,\n+\t\t\t\t\t\t&head_idx, &buf_len,\n+\t\t\t\t\t\tVHOST_ACCESS_RO) < 0))\n+\t\t\tbreak;\n+\n+\t\tpkt = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len);\n+\t\tif (unlikely(pkt == NULL)) {\n+\t\t\t/**\n+\t\t\t * mbuf allocation fails for jumbo packets when external\n+\t\t\t * buffer allocation is not allowed and linear buffer\n+\t\t\t * is required. Drop this packet.\n+\t\t\t */\n+\t\t\tif (!allocerr_warned) {\n+\t\t\t\tVHOST_LOG_DATA(ERR,\n+\t\t\t\t\t\"Failed mbuf alloc of size %d from %s on %s.\\n\",\n+\t\t\t\t\tbuf_len, mbuf_pool->name, dev->ifname);\n+\t\t\t\tallocerr_warned = true;\n+\t\t\t}\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tslot_idx = (vq->async_pkts_idx + nr_async_pkts) &\n+\t\t\t\t(vq->size - 1);\n+\t\terr = async_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt,\n+\t\t\t\tmbuf_pool, &src_iovec[iovec_idx],\n+\t\t\t\t&dst_iovec[iovec_idx], &it_pool[it_idx],\n+\t\t\t\t&it_pool[it_idx + 1],\n+\t\t\t\t&pkts_info[slot_idx].nethdr, legacy_ol_flags);\n+\t\tif (unlikely(err)) {\n+\t\t\trte_pktmbuf_free(pkt);\n+\t\t\tif (!allocerr_warned) {\n+\t\t\t\tVHOST_LOG_DATA(ERR,\n+\t\t\t\t\t\"Failed to copy desc to mbuf on %s.\\n\",\n+\t\t\t\t\tdev->ifname);\n+\t\t\t\tallocerr_warned = true;\n+\t\t\t}\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tif (it_pool[it_idx].count) {\n+\t\t\tuint16_t to = vq->async_desc_idx_split & (vq->size - 1);\n+\n+\t\t\tasync_fill_desc(&tdes[nr_async_burst], &it_pool[it_idx],\n+\t\t\t\t&it_pool[it_idx + 1]);\n+\t\t\tpkts_info[slot_idx].mbuf = pkt;\n+\t\t\tasync_pkts_log[nr_async_pkts++].last_avail_idx =\n+\t\t\t\tvq->last_avail_idx;\n+\t\t\tnr_async_burst++;\n+\t\t\tiovec_idx += it_pool[it_idx].nr_segs;\n+\t\t\tit_idx += 2;\n+\t\t\tsegs_await += it_pool[it_idx].nr_segs;\n+\n+\t\t\t/* keep used desc */\n+\t\t\tvq->async_descs_split[to].id = head_idx;\n+\t\t\tvq->async_descs_split[to].len = 0;\n+\t\t\tvq->async_desc_idx_split++;\n+\t\t} else {\n+\t\t\tupdate_shadow_used_ring_split(vq, head_idx, 0);\n+\t\t\tpkts[nr_done_pkts++] = pkt;\n+\t\t}\n+\n+\t\tvq->last_avail_idx++;\n+\n+\t\tif (unlikely((nr_async_burst >= VHOST_ASYNC_BATCH_THRESHOLD) ||\n+\t\t\t\t\t((VHOST_MAX_ASYNC_VEC >> 1) -\n+\t\t\t\t\t segs_await < BUF_VECTOR_MAX))) {\n+\t\t\tuint16_t nr_pkts;\n+\n+\t\t\tnr_pkts = vq->async_ops.transfer_data(dev->vid,\n+\t\t\t\t\tqueue_id, tdes, 0, nr_async_burst);\n+\t\t\tsrc_iovec = vec_pool;\n+\t\t\tdst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\t\t\tit_idx = 0;\n+\t\t\tsegs_await = 0;\n+\t\t\tvq->async_pkts_inflight_n += nr_pkts;\n+\n+\t\t\tif (unlikely(nr_pkts < nr_async_burst)) {\n+\t\t\t\tpkt_err = nr_async_burst - nr_pkts;\n+\t\t\t\tnr_async_burst = 0;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tnr_async_burst = 0;\n+\t\t}\n+\t}\n+\n+\tif (nr_async_burst) {\n+\t\tuint32_t nr_pkts;\n+\n+\t\tnr_pkts = vq->async_ops.transfer_data(dev->vid, queue_id,\n+\t\t\t\ttdes, 0, nr_async_burst);\n+\t\tvq->async_pkts_inflight_n += nr_pkts;\n+\n+\t\tif (unlikely(nr_pkts < nr_async_burst))\n+\t\t\tpkt_err = nr_async_burst - nr_pkts;\n+\t}\n+\n+\tdo_data_copy_dequeue(vq);\n+\n+\tif (unlikely(pkt_err)) {\n+\t\tuint16_t nr_err_dma = pkt_err;\n+\t\tuint16_t nr_err_sw;\n+\n+\t\tnr_async_pkts -= nr_err_dma;\n+\n+\t\t/**\n+\t\t * revert shadow used ring and free pktmbufs for\n+\t\t * CPU-copied pkts after the first DMA-error pkt.\n+\t\t */\n+\t\tnr_err_sw = vq->last_avail_idx -\n+\t\t\tasync_pkts_log[nr_async_pkts].last_avail_idx -\n+\t\t\tnr_err_dma;\n+\t\tvq->shadow_used_idx -= nr_err_sw;\n+\t\twhile (nr_err_sw-- > 0)\n+\t\t\trte_pktmbuf_free(pkts[--nr_done_pkts]);\n+\n+\t\t/**\n+\t\t * recover DMA-copy related structures and free pktmbufs\n+\t\t * for DMA-error pkts.\n+\t\t */\n+\t\tvq->async_desc_idx_split -= nr_err_dma;\n+\t\twhile (nr_err_dma-- > 0) {\n+\t\t\trte_pktmbuf_free(\n+\t\t\t\tpkts_info[slot_idx & (vq->size - 1)].mbuf);\n+\t\t\tslot_idx--;\n+\t\t}\n+\n+\t\t/* recover available ring */\n+\t\tvq->last_avail_idx =\n+\t\t\tasync_pkts_log[nr_async_pkts].last_avail_idx;\n+\t}\n+\n+\tvq->async_pkts_idx += nr_async_pkts;\n+\n+out:\n+\tif (likely(vq->shadow_used_idx)) {\n+\t\tflush_shadow_used_ring_split(dev, vq);\n+\t\tvhost_vring_call_split(dev, vq);\n+\t}\n+\n+\treturn nr_done_pkts;\n+}\n+\n+__rte_noinline\n+static uint16_t\n+virtio_dev_tx_async_split_legacy(struct virtio_net *dev,\n+\t\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\t\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,\n+\t\tuint16_t count)\n+{\n+\treturn virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,\n+\t\t\t\tpkts, count, true);\n+}\n+\n+__rte_noinline\n+static uint16_t\n+virtio_dev_tx_async_split_compliant(struct virtio_net *dev,\n+\t\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\t\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,\n+\t\tuint16_t count)\n+{\n+\treturn virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,\n+\t\t\t\tpkts, count, false);\n+}\n+\n+uint16_t\n+rte_vhost_try_dequeue_burst(int vid, uint16_t queue_id,\n+\tstruct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,\n+\tint *nr_inflight)\n+{\n+\tstruct virtio_net *dev;\n+\tstruct rte_mbuf *rarp_mbuf = NULL;\n+\tstruct vhost_virtqueue *vq;\n+\tint16_t success = 1;\n+\n+\t*nr_inflight = -1;\n+\n+\tdev = get_device(vid);\n+\tif (!dev)\n+\t\treturn 0;\n+\n+\tif (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {\n+\t\tVHOST_LOG_DATA(ERR,\n+\t\t\t\"(%d) %s: built-in vhost net backend is disabled.\\n\",\n+\t\t\tdev->vid, __func__);\n+\t\treturn 0;\n+\t}\n+\n+\tif (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {\n+\t\tVHOST_LOG_DATA(ERR,\n+\t\t\t\"(%d) %s: invalid virtqueue idx %d.\\n\",\n+\t\t\tdev->vid, __func__, queue_id);\n+\t\treturn 0;\n+\t}\n+\n+\tvq = dev->virtqueue[queue_id];\n+\n+\tif (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))\n+\t\treturn 0;\n+\n+\tif (unlikely(vq->enabled == 0)) {\n+\t\tcount = 0;\n+\t\tgoto out_access_unlock;\n+\t}\n+\n+\tif (unlikely(!vq->async_registered)) {\n+\t\tVHOST_LOG_DATA(ERR, \"(%d) %s: async not registered for queue id %d.\\n\",\n+\t\t\tdev->vid, __func__, queue_id);\n+\t\tcount = 0;\n+\t\tgoto out_access_unlock;\n+\t}\n+\n+\tif (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))\n+\t\tvhost_user_iotlb_rd_lock(vq);\n+\n+\tif (unlikely(vq->access_ok == 0))\n+\t\tif (unlikely(vring_translate(dev, vq) < 0)) {\n+\t\t\tcount = 0;\n+\t\t\tgoto out_access_unlock;\n+\t\t}\n+\n+\t/*\n+\t * Construct a RARP broadcast packet, and inject it to the \"pkts\"\n+\t * array, to looks like that guest actually send such packet.\n+\t *\n+\t * Check user_send_rarp() for more information.\n+\t *\n+\t * broadcast_rarp shares a cacheline in the virtio_net structure\n+\t * with some fields that are accessed during enqueue and\n+\t * __atomic_compare_exchange_n causes a write if performed compare\n+\t * and exchange. This could result in false sharing between enqueue\n+\t * and dequeue.\n+\t *\n+\t * Prevent unnecessary false sharing by reading broadcast_rarp first\n+\t * and only performing compare and exchange if the read indicates it\n+\t * is likely to be set.\n+\t */\n+\tif (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&\n+\t\t\t__atomic_compare_exchange_n(&dev->broadcast_rarp,\n+\t\t\t&success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {\n+\n+\t\trarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);\n+\t\tif (rarp_mbuf == NULL) {\n+\t\t\tVHOST_LOG_DATA(ERR, \"Failed to make RARP packet.\\n\");\n+\t\t\tcount = 0;\n+\t\t\tgoto out;\n+\t\t}\n+\t\tcount -= 1;\n+\t}\n+\n+\tif (unlikely(vq_is_packed(dev)))\n+\t\treturn 0;\n+\n+\tif (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)\n+\t\tcount = virtio_dev_tx_async_split_legacy(dev, vq, queue_id,\n+\t\t\t\tmbuf_pool, pkts, count);\n+\telse\n+\t\tcount = virtio_dev_tx_async_split_compliant(dev, vq, queue_id,\n+\t\t\t\tmbuf_pool, pkts, count);\n+\n+out:\n+\t*nr_inflight = vq->async_pkts_inflight_n;\n+\n+\tif (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))\n+\t\tvhost_user_iotlb_rd_unlock(vq);\n+\n+out_access_unlock:\n+\trte_spinlock_unlock(&vq->access_lock);\n+\n+\tif (unlikely(rarp_mbuf != NULL)) {\n+\t\t/*\n+\t\t * Inject it to the head of \"pkts\" array, so that switch's mac\n+\t\t * learning table will get updated first.\n+\t\t */\n+\t\tmemmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));\n+\t\tpkts[0] = rarp_mbuf;\n+\t\tcount += 1;\n+\t}\n+\n+\treturn count;\n+}\n",
    "prefixes": [
        "1/1"
    ]
}