get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/89358/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 89358,
    "url": "http://patchwork.dpdk.org/api/patches/89358/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20210317085426.10119-1-Cheng1.jiang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210317085426.10119-1-Cheng1.jiang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210317085426.10119-1-Cheng1.jiang@intel.com",
    "date": "2021-03-17T08:54:26",
    "name": "vhost: add support for packed ring in async vhost",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "e0c15ef34b3b88c8096b9d781cce87b0b0ec805b",
    "submitter": {
        "id": 1530,
        "url": "http://patchwork.dpdk.org/api/people/1530/?format=api",
        "name": "Jiang, Cheng1",
        "email": "Cheng1.jiang@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patchwork.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20210317085426.10119-1-Cheng1.jiang@intel.com/mbox/",
    "series": [
        {
            "id": 15731,
            "url": "http://patchwork.dpdk.org/api/series/15731/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=15731",
            "date": "2021-03-17T08:54:26",
            "name": "vhost: add support for packed ring in async vhost",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/15731/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/89358/comments/",
    "check": "success",
    "checks": "http://patchwork.dpdk.org/api/patches/89358/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 4EFF4A00C2;\n\tWed, 17 Mar 2021 10:11:30 +0100 (CET)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id CDC7E16084C;\n\tWed, 17 Mar 2021 10:11:29 +0100 (CET)",
            "from mga01.intel.com (mga01.intel.com [192.55.52.88])\n by mails.dpdk.org (Postfix) with ESMTP id 3C2AF40687\n for <dev@dpdk.org>; Wed, 17 Mar 2021 10:11:27 +0100 (CET)",
            "from orsmga007.jf.intel.com ([10.7.209.58])\n by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 17 Mar 2021 02:11:26 -0700",
            "from dpdk_jiangcheng.sh.intel.com ([10.67.119.112])\n by orsmga007.jf.intel.com with ESMTP; 17 Mar 2021 02:11:23 -0700"
        ],
        "IronPort-SDR": [
            "\n yhAovoASci6z3BJOvGSkjgKua0BzltptbZ8iDkSXBHHkE88dT/DwZngvlXvpblhkGwsx0grOGb\n x1zwVCOTo/aw==",
            "\n BoF/h0R3dCOtV1H8pHpxeD1YYfJ7P0WbLKVEbZfF5IE728mHPe81U+Elwwwy0jdj3HY7YBRP0D\n soO6zuahDjww=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6000,8403,9925\"; a=\"209387154\"",
            "E=Sophos;i=\"5.81,255,1610438400\"; d=\"scan'208\";a=\"209387154\"",
            "E=Sophos;i=\"5.81,255,1610438400\"; d=\"scan'208\";a=\"411397523\""
        ],
        "X-ExtLoop1": "1",
        "From": "Cheng Jiang <Cheng1.jiang@intel.com>",
        "To": "maxime.coquelin@redhat.com,\n\tchenbo.xia@intel.com",
        "Cc": "dev@dpdk.org, jiayu.hu@intel.com, yvonnex.yang@intel.com,\n yinan.wang@intel.com, Cheng Jiang <Cheng1.jiang@intel.com>",
        "Date": "Wed, 17 Mar 2021 08:54:26 +0000",
        "Message-Id": "<20210317085426.10119-1-Cheng1.jiang@intel.com>",
        "X-Mailer": "git-send-email 2.29.2",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH] vhost: add support for packed ring in async vhost",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "For now async vhost data path only supports split ring structure. In\norder to make async vhost compatible with virtio 1.1 spec this patch\nenables packed ring in async vhost data path.\n\nSigned-off-by: Cheng Jiang <Cheng1.jiang@intel.com>\n---\n lib/librte_vhost/rte_vhost_async.h |   1 +\n lib/librte_vhost/vhost.c           |  15 +-\n lib/librte_vhost/vhost.h           |   7 +-\n lib/librte_vhost/virtio_net.c      | 449 +++++++++++++++++++++++++++--\n 4 files changed, 436 insertions(+), 36 deletions(-)",
    "diff": "diff --git a/lib/librte_vhost/rte_vhost_async.h b/lib/librte_vhost/rte_vhost_async.h\nindex c855ff875..29de5df8c 100644\n--- a/lib/librte_vhost/rte_vhost_async.h\n+++ b/lib/librte_vhost/rte_vhost_async.h\n@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {\n struct async_inflight_info {\n \tstruct rte_mbuf *mbuf;\n \tuint16_t descs; /* num of descs inflight */\n+\tuint16_t nr_buffers; /* num of buffers inflight for packed ring*/\n };\n \n /**\ndiff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c\nindex 52ab93d1e..445a9f327 100644\n--- a/lib/librte_vhost/vhost.c\n+++ b/lib/librte_vhost/vhost.c\n@@ -1603,9 +1603,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,\n \t\treturn -1;\n \n \t/* packed queue is not supported */\n-\tif (unlikely(vq_is_packed(dev) || !f.async_inorder)) {\n+\tif (unlikely(!f.async_inorder)) {\n \t\tVHOST_LOG_CONFIG(ERR,\n-\t\t\t\"async copy is not supported on packed queue or non-inorder mode \"\n+\t\t\t\"async copy is not supported on non-inorder mode \"\n \t\t\t\"(vid %d, qid: %d)\\n\", vid, queue_id);\n \t\treturn -1;\n \t}\n@@ -1643,10 +1643,17 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,\n \tvq->vec_pool = rte_malloc_socket(NULL,\n \t\t\tVHOST_MAX_ASYNC_VEC * sizeof(struct iovec),\n \t\t\tRTE_CACHE_LINE_SIZE, node);\n-\tvq->async_descs_split = rte_malloc_socket(NULL,\n+\tif (vq_is_packed(dev)) {\n+\t\tvq->async_buffers_packed = rte_malloc_socket(NULL,\n+\t\t\tvq->size * sizeof(struct vring_used_elem_packed),\n+\t\t\tRTE_CACHE_LINE_SIZE, node);\n+\t} else {\n+\t\tvq->async_descs_split = rte_malloc_socket(NULL,\n \t\t\tvq->size * sizeof(struct vring_used_elem),\n \t\t\tRTE_CACHE_LINE_SIZE, node);\n-\tif (!vq->async_descs_split || !vq->async_pkts_info ||\n+\t}\n+\n+\tif (!vq->async_pkts_info ||\n \t\t!vq->it_pool || !vq->vec_pool) {\n \t\tvhost_free_async_mem(vq);\n \t\tVHOST_LOG_CONFIG(ERR,\ndiff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h\nindex 658f6fc28..d6324fbf8 100644\n--- a/lib/librte_vhost/vhost.h\n+++ b/lib/librte_vhost/vhost.h\n@@ -206,9 +206,14 @@ struct vhost_virtqueue {\n \tuint16_t\tasync_pkts_idx;\n \tuint16_t\tasync_pkts_inflight_n;\n \tuint16_t\tasync_last_pkts_n;\n-\tstruct vring_used_elem  *async_descs_split;\n+\tunion {\n+\t\tstruct vring_used_elem  *async_descs_split;\n+\t\tstruct vring_used_elem_packed *async_buffers_packed;\n+\t};\n \tuint16_t async_desc_idx;\n+\tuint16_t async_packed_buffer_idx;\n \tuint16_t last_async_desc_idx;\n+\tuint16_t last_async_buffer_idx;\n \n \t/* vq async features */\n \tbool\t\tasync_inorder;\ndiff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c\nindex 583bf379c..9e798226b 100644\n--- a/lib/librte_vhost/virtio_net.c\n+++ b/lib/librte_vhost/virtio_net.c\n@@ -363,8 +363,7 @@ vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,\n }\n \n static __rte_always_inline void\n-vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n-\t\t\t\t   struct vhost_virtqueue *vq,\n+vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,\n \t\t\t\t   uint32_t len[],\n \t\t\t\t   uint16_t id[],\n \t\t\t\t   uint16_t count[],\n@@ -382,6 +381,17 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n \t\tvq->shadow_aligned_idx += count[i];\n \t\tvq->shadow_used_idx++;\n \t}\n+}\n+\n+static __rte_always_inline void\n+vhost_shadow_enqueue_single_packed(struct virtio_net *dev,\n+\t\t\t\t   struct vhost_virtqueue *vq,\n+\t\t\t\t   uint32_t len[],\n+\t\t\t\t   uint16_t id[],\n+\t\t\t\t   uint16_t count[],\n+\t\t\t\t   uint16_t num_buffers)\n+{\n+\tvhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);\n \n \tif (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {\n \t\tdo_data_copy_enqueue(dev, vq);\n@@ -1633,12 +1643,343 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,\n \treturn pkt_idx;\n }\n \n+static __rte_always_inline int\n+vhost_enqueue_async_single_packed(struct virtio_net *dev,\n+\t\t\t    struct vhost_virtqueue *vq,\n+\t\t\t    struct rte_mbuf *pkt,\n+\t\t\t    struct buf_vector *buf_vec,\n+\t\t\t    uint16_t *nr_descs,\n+\t\t\t    uint16_t *nr_buffers,\n+\t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n+\t\t\t    struct rte_vhost_iov_iter *src_it,\n+\t\t\t    struct rte_vhost_iov_iter *dst_it)\n+{\n+\tuint16_t nr_vec = 0;\n+\tuint16_t avail_idx = vq->last_avail_idx;\n+\tuint16_t max_tries, tries = 0;\n+\tuint16_t buf_id = 0;\n+\tuint32_t len = 0;\n+\tuint16_t desc_count;\n+\tuint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);\n+\tuint32_t buffer_len[vq->size];\n+\tuint16_t buffer_buf_id[vq->size];\n+\tuint16_t buffer_desc_count[vq->size];\n+\t*nr_buffers = 0;\n+\n+\tif (rxvq_is_mergeable(dev))\n+\t\tmax_tries = vq->size - 1;\n+\telse\n+\t\tmax_tries = 1;\n+\n+\twhile (size > 0) {\n+\t\t/*\n+\t\t * if we tried all available ring items, and still\n+\t\t * can't get enough buf, it means something abnormal\n+\t\t * happened.\n+\t\t */\n+\t\tif (unlikely(++tries > max_tries))\n+\t\t\treturn -1;\n+\n+\t\tif (unlikely(fill_vec_buf_packed(dev, vq,\n+\t\t\t\t\t\tavail_idx, &desc_count,\n+\t\t\t\t\t\tbuf_vec, &nr_vec,\n+\t\t\t\t\t\t&buf_id, &len,\n+\t\t\t\t\t\tVHOST_ACCESS_RW) < 0))\n+\t\t\treturn -1;\n+\n+\t\tlen = RTE_MIN(len, size);\n+\t\tsize -= len;\n+\n+\t\tbuffer_len[*nr_buffers] = len;\n+\t\tbuffer_buf_id[*nr_buffers] = buf_id;\n+\t\tbuffer_desc_count[*nr_buffers] = desc_count;\n+\t\t*nr_buffers += 1;\n+\n+\t\t*nr_descs += desc_count;\n+\t\tavail_idx += desc_count;\n+\t\tif (avail_idx >= vq->size)\n+\t\t\tavail_idx -= vq->size;\n+\t}\n+\n+\tif (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers,\n+\t\tsrc_iovec, dst_iovec, src_it, dst_it) < 0)\n+\t\treturn -1;\n+\n+\tvhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id,\n+\t\t\t\t\t   buffer_desc_count, *nr_buffers);\n+\n+\treturn 0;\n+}\n+\n+static __rte_always_inline int16_t\n+virtio_dev_rx_async_single_packed(struct virtio_net *dev,\n+\t\t\t    struct vhost_virtqueue *vq,\n+\t\t\t    struct rte_mbuf *pkt,\n+\t\t\t    uint16_t *nr_descs, uint16_t *nr_buffers,\n+\t\t\t    struct iovec *src_iovec, struct iovec *dst_iovec,\n+\t\t\t    struct rte_vhost_iov_iter *src_it,\n+\t\t\t    struct rte_vhost_iov_iter *dst_it)\n+{\n+\tstruct buf_vector buf_vec[BUF_VECTOR_MAX];\n+\t*nr_descs = 0;\n+\t*nr_buffers = 0;\n+\n+\tif (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec,\n+\t\t\t\t\t\t nr_descs,\n+\t\t\t\t\t\t nr_buffers,\n+\t\t\t\t\t\t src_iovec, dst_iovec,\n+\t\t\t\t\t\t src_it, dst_it) < 0)) {\n+\t\tVHOST_LOG_DATA(DEBUG,\n+\t\t\t\t\"(%d) failed to get enough desc from vring\\n\",\n+\t\t\t\tdev->vid);\n+\t\treturn -1;\n+\t}\n+\n+\tVHOST_LOG_DATA(DEBUG, \"(%d) current index %d | end index %d\\n\",\n+\t\t\tdev->vid, vq->last_avail_idx,\n+\t\t\tvq->last_avail_idx + *nr_descs);\n+\n+\treturn 0;\n+}\n+\n+static __rte_noinline uint32_t\n+virtio_dev_rx_async_submit_packed(struct virtio_net *dev,\n+\tstruct vhost_virtqueue *vq, uint16_t queue_id,\n+\tstruct rte_mbuf **pkts, uint32_t count,\n+\tstruct rte_mbuf **comp_pkts, uint32_t *comp_count)\n+{\n+\tuint32_t pkt_idx = 0, pkt_burst_idx = 0;\n+\tuint16_t num_buffers;\n+\tuint16_t num_desc;\n+\n+\tstruct rte_vhost_iov_iter *it_pool = vq->it_pool;\n+\tstruct iovec *vec_pool = vq->vec_pool;\n+\tstruct rte_vhost_async_desc tdes[MAX_PKT_BURST];\n+\tstruct iovec *src_iovec = vec_pool;\n+\tstruct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\tstruct rte_vhost_iov_iter *src_it = it_pool;\n+\tstruct rte_vhost_iov_iter *dst_it = it_pool + 1;\n+\tuint16_t slot_idx = 0;\n+\tuint16_t segs_await = 0;\n+\tstruct async_inflight_info *pkts_info = vq->async_pkts_info;\n+\tuint32_t n_pkts = 0, pkt_err = 0;\n+\tuint32_t num_async_pkts = 0, num_done_pkts = 0;\n+\tstruct {\n+\t\tuint16_t pkt_idx;\n+\t\tuint16_t last_avail_idx;\n+\t} async_pkts_log[MAX_PKT_BURST];\n+\n+\trte_prefetch0(&vq->desc[vq->last_avail_idx & (vq->size - 1)]);\n+\n+\tfor (pkt_idx = 0; pkt_idx < count; pkt_idx++) {\n+\t\tif (unlikely(virtio_dev_rx_async_single_packed(dev, vq,\n+\t\t\t\t\t\tpkts[pkt_idx],\n+\t\t\t\t\t\t&num_desc, &num_buffers,\n+\t\t\t\t\t\tsrc_iovec, dst_iovec,\n+\t\t\t\t\t\tsrc_it, dst_it) < 0)) {\n+\t\t\tbreak;\n+\t\t}\n+\n+\t\tVHOST_LOG_DATA(DEBUG, \"(%d) current index %d | end index %d\\n\",\n+\t\t\tdev->vid, vq->last_avail_idx,\n+\t\t\tvq->last_avail_idx + num_desc);\n+\n+\t\tslot_idx = (vq->async_pkts_idx + num_async_pkts) &\n+\t\t\t(vq->size - 1);\n+\t\tif (src_it->count) {\n+\t\t\tuint16_t from, to;\n+\n+\t\t\tasync_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);\n+\t\t\tpkts_info[slot_idx].descs = num_desc;\n+\t\t\tpkts_info[slot_idx].nr_buffers = num_buffers;\n+\t\t\tpkts_info[slot_idx].mbuf = pkts[pkt_idx];\n+\t\t\tasync_pkts_log[num_async_pkts].pkt_idx = pkt_idx;\n+\t\t\tasync_pkts_log[num_async_pkts++].last_avail_idx =\n+\t\t\t\tvq->last_avail_idx;\n+\t\t\tsrc_iovec += src_it->nr_segs;\n+\t\t\tdst_iovec += dst_it->nr_segs;\n+\t\t\tsrc_it += 2;\n+\t\t\tdst_it += 2;\n+\t\t\tsegs_await += src_it->nr_segs;\n+\n+\t\t\t/**\n+\t\t\t * recover shadow used ring and keep DMA-occupied\n+\t\t\t * descriptors.\n+\t\t\t */\n+\t\t\tfrom = vq->shadow_used_idx - num_buffers;\n+\t\t\tto = vq->async_packed_buffer_idx & (vq->size - 1);\n+\t\t\tif (num_buffers + to <= vq->size) {\n+\t\t\t\trte_memcpy(&vq->async_buffers_packed[to],\n+\t\t\t\t\t&vq->shadow_used_packed[from],\n+\t\t\t\t\tnum_buffers *\n+\t\t\t\t\tsizeof(struct vring_used_elem_packed));\n+\t\t\t} else {\n+\t\t\t\tint size = vq->size - to;\n+\n+\t\t\t\trte_memcpy(&vq->async_buffers_packed[to],\n+\t\t\t\t\t&vq->shadow_used_packed[from],\n+\t\t\t\t\tsize *\n+\t\t\t\t\tsizeof(struct vring_used_elem_packed));\n+\t\t\t\trte_memcpy(vq->async_buffers_packed,\n+\t\t\t\t\t&vq->shadow_used_packed[from +\n+\t\t\t\t\tsize], (num_buffers - size) *\n+\t\t\t\t\tsizeof(struct vring_used_elem_packed));\n+\t\t\t}\n+\t\t\tvq->async_packed_buffer_idx += num_buffers;\n+\t\t\tvq->shadow_used_idx -= num_buffers;\n+\t\t} else\n+\t\t\tcomp_pkts[num_done_pkts++] = pkts[pkt_idx];\n+\n+\t\tvq_inc_last_avail_packed(vq, num_desc);\n+\n+\t\t/*\n+\t\t * conditions to trigger async device transfer:\n+\t\t * - buffered packet number reaches transfer threshold\n+\t\t * - unused async iov number is less than max vhost vector\n+\t\t */\n+\t\tif (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||\n+\t\t\t((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <\n+\t\t\tBUF_VECTOR_MAX))) {\n+\t\t\tn_pkts = vq->async_ops.transfer_data(dev->vid,\n+\t\t\t\t\tqueue_id, tdes, 0, pkt_burst_idx);\n+\t\t\tsrc_iovec = vec_pool;\n+\t\t\tdst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);\n+\t\t\tsrc_it = it_pool;\n+\t\t\tdst_it = it_pool + 1;\n+\t\t\tsegs_await = 0;\n+\t\t\tvq->async_pkts_inflight_n += n_pkts;\n+\n+\t\t\tif (unlikely(n_pkts < pkt_burst_idx)) {\n+\t\t\t\t/*\n+\t\t\t\t * log error packets number here and do actual\n+\t\t\t\t * error processing when applications poll\n+\t\t\t\t * completion\n+\t\t\t\t */\n+\t\t\t\tpkt_err = pkt_burst_idx - n_pkts;\n+\t\t\t\tpkt_burst_idx = 0;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\n+\t\t\tpkt_burst_idx = 0;\n+\t\t}\n+\t}\n+\n+\tif (pkt_burst_idx) {\n+\t\tn_pkts = vq->async_ops.transfer_data(dev->vid,\n+\t\t\t\tqueue_id, tdes, 0, pkt_burst_idx);\n+\t\tvq->async_pkts_inflight_n += n_pkts;\n+\n+\t\tif (unlikely(n_pkts < pkt_burst_idx))\n+\t\t\tpkt_err = pkt_burst_idx - n_pkts;\n+\t}\n+\n+\tdo_data_copy_enqueue(dev, vq);\n+\n+\tif (unlikely(pkt_err)) {\n+\t\tuint16_t num_buffers = 0;\n+\n+\t\tnum_async_pkts -= pkt_err;\n+\t\t/* calculate the sum of descriptors of DMA-error packets. */\n+\t\twhile (pkt_err-- > 0) {\n+\t\t\tnum_buffers +=\n+\t\t\t\tpkts_info[slot_idx & (vq->size - 1)].nr_buffers;\n+\t\t\tslot_idx--;\n+\t\t}\n+\t\tvq->async_packed_buffer_idx -= num_buffers;\n+\t\t/* recover shadow used ring and available ring */\n+\t\tvq->shadow_used_idx -= (vq->last_avail_idx -\n+\t\t\t\tasync_pkts_log[num_async_pkts].last_avail_idx -\n+\t\t\t\tnum_buffers);\n+\t\tvq->last_avail_idx =\n+\t\t\tasync_pkts_log[num_async_pkts].last_avail_idx;\n+\t\tpkt_idx = async_pkts_log[num_async_pkts].pkt_idx;\n+\t\tnum_done_pkts = pkt_idx - num_async_pkts;\n+\t}\n+\n+\tvq->async_pkts_idx += num_async_pkts;\n+\t*comp_count = num_done_pkts;\n+\n+\tif (likely(vq->shadow_used_idx)) {\n+\t\tvhost_flush_enqueue_shadow_packed(dev, vq);\n+\t\tvhost_vring_call_packed(dev, vq);\n+\t}\n+\n+\treturn pkt_idx;\n+}\n+\n+static __rte_always_inline void\n+vhost_update_used_packed(struct virtio_net *dev,\n+\t\t\t\t  struct vhost_virtqueue *vq,\n+\t\t\t\t  struct vring_used_elem_packed *shadow_ring,\n+\t\t\t\t  uint16_t count)\n+{\n+\tif (count == 0)\n+\t\treturn;\n+\tint i;\n+\tuint16_t used_idx = vq->last_used_idx;\n+\tuint16_t head_idx = vq->last_used_idx;\n+\tuint16_t head_flags = 0;\n+\n+\t/* Split loop in two to save memory barriers */\n+\tfor (i = 0; i < count; i++) {\n+\t\tvq->desc_packed[used_idx].id = shadow_ring[i].id;\n+\t\tvq->desc_packed[used_idx].len = shadow_ring[i].len;\n+\n+\t\tused_idx += shadow_ring[i].count;\n+\t\tif (used_idx >= vq->size)\n+\t\t\tused_idx -= vq->size;\n+\t}\n+\n+\t/* The ordering for storing desc flags needs to be enforced. */\n+\trte_atomic_thread_fence(__ATOMIC_RELEASE);\n+\n+\tfor (i = 0; i < count; i++) {\n+\t\tuint16_t flags;\n+\n+\t\tif (vq->shadow_used_packed[i].len)\n+\t\t\tflags = VRING_DESC_F_WRITE;\n+\t\telse\n+\t\t\tflags = 0;\n+\n+\t\tif (vq->used_wrap_counter) {\n+\t\t\tflags |= VRING_DESC_F_USED;\n+\t\t\tflags |= VRING_DESC_F_AVAIL;\n+\t\t} else {\n+\t\t\tflags &= ~VRING_DESC_F_USED;\n+\t\t\tflags &= ~VRING_DESC_F_AVAIL;\n+\t\t}\n+\n+\t\tif (i > 0) {\n+\t\t\tvq->desc_packed[vq->last_used_idx].flags = flags;\n+\n+\t\t\tvhost_log_cache_used_vring(dev, vq,\n+\t\t\t\t\tvq->last_used_idx *\n+\t\t\t\t\tsizeof(struct vring_packed_desc),\n+\t\t\t\t\tsizeof(struct vring_packed_desc));\n+\t\t} else {\n+\t\t\thead_idx = vq->last_used_idx;\n+\t\t\thead_flags = flags;\n+\t\t}\n+\n+\t\tvq_inc_last_used_packed(vq, shadow_ring[i].count);\n+\t}\n+\n+\tvq->desc_packed[head_idx].flags = head_flags;\n+\n+\tvhost_log_cache_used_vring(dev, vq,\n+\t\t\t\thead_idx *\n+\t\t\t\tsizeof(struct vring_packed_desc),\n+\t\t\t\tsizeof(struct vring_packed_desc));\n+\n+\tvhost_log_cache_sync(dev, vq);\n+}\n+\n uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \t\tstruct rte_mbuf **pkts, uint16_t count)\n {\n \tstruct virtio_net *dev = get_device(vid);\n \tstruct vhost_virtqueue *vq;\n-\tuint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0;\n+\tuint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;\n \tuint16_t start_idx, pkts_idx, vq_size;\n \tstruct async_inflight_info *pkts_info;\n \tuint16_t from, i;\n@@ -1680,53 +2021,98 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,\n \t\tgoto done;\n \t}\n \n-\tfor (i = 0; i < n_pkts_put; i++) {\n-\t\tfrom = (start_idx + i) & (vq_size - 1);\n-\t\tn_descs += pkts_info[from].descs;\n-\t\tpkts[i] = pkts_info[from].mbuf;\n+\tif (vq_is_packed(dev)) {\n+\t\tfor (i = 0; i < n_pkts_put; i++) {\n+\t\t\tfrom = (start_idx + i) & (vq_size - 1);\n+\t\t\tn_buffers += pkts_info[from].nr_buffers;\n+\t\t\tpkts[i] = pkts_info[from].mbuf;\n+\t\t}\n+\t} else {\n+\t\tfor (i = 0; i < n_pkts_put; i++) {\n+\t\t\tfrom = (start_idx + i) & (vq_size - 1);\n+\t\t\tn_descs += pkts_info[from].descs;\n+\t\t\tpkts[i] = pkts_info[from].mbuf;\n+\t\t}\n \t}\n+\n \tvq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;\n \tvq->async_pkts_inflight_n -= n_pkts_put;\n \n \tif (likely(vq->enabled && vq->access_ok)) {\n-\t\tuint16_t nr_left = n_descs;\n \t\tuint16_t nr_copy;\n \t\tuint16_t to;\n \n \t\t/* write back completed descriptors to used ring */\n-\t\tdo {\n-\t\t\tfrom = vq->last_async_desc_idx & (vq->size - 1);\n-\t\t\tnr_copy = nr_left + from <= vq->size ? nr_left :\n-\t\t\t\tvq->size - from;\n-\t\t\tto = vq->last_used_idx & (vq->size - 1);\n-\n-\t\t\tif (to + nr_copy <= vq->size) {\n-\t\t\t\trte_memcpy(&vq->used->ring[to],\n+\t\tif (vq_is_packed(dev)) {\n+\t\t\tuint16_t nr_left = n_buffers;\n+\t\t\tuint16_t to;\n+\t\t\tdo {\n+\t\t\t\tto = vq->async_packed_buffer_idx &\n+\t\t\t\t\t\t\t\t(vq->size - 1);\n+\t\t\t\tfrom = vq->last_async_buffer_idx &\n+\t\t\t\t\t\t\t\t(vq->size - 1);\n+\t\t\t\tif (to == from)\n+\t\t\t\t\tbreak;\n+\t\t\t\tif (to > from) {\n+\t\t\t\t\tvhost_update_used_packed(dev, vq,\n+\t\t\t\t\t\tvq->async_buffers_packed + from,\n+\t\t\t\t\t\tto - from);\n+\t\t\t\t\tvq->last_async_buffer_idx += to - from;\n+\t\t\t\t\tnr_left -= to - from;\n+\t\t\t\t} else {\n+\t\t\t\t\tvhost_update_used_packed(dev, vq,\n+\t\t\t\t\t\tvq->async_buffers_packed + from,\n+\t\t\t\t\t\tvq->size - from);\n+\t\t\t\t\tvq->last_async_buffer_idx +=\n+\t\t\t\t\t\t\t\tvq->size - from;\n+\t\t\t\t\tnr_left -= vq->size - from;\n+\t\t\t\t}\n+\t\t\t} while (nr_left > 0);\n+\t\t\tvhost_vring_call_packed(dev, vq);\n+\t\t} else {\n+\t\t\tuint16_t nr_left = n_descs;\n+\t\t\tdo {\n+\t\t\t\tfrom = vq->last_async_desc_idx & (vq->size - 1);\n+\t\t\t\tnr_copy = nr_left + from <= vq->size ? nr_left :\n+\t\t\t\t\tvq->size - from;\n+\t\t\t\tto = vq->last_used_idx & (vq->size - 1);\n+\n+\t\t\t\tif (to + nr_copy <= vq->size) {\n+\t\t\t\t\trte_memcpy(&vq->used->ring[to],\n \t\t\t\t\t\t&vq->async_descs_split[from],\n \t\t\t\t\t\tnr_copy *\n \t\t\t\t\t\tsizeof(struct vring_used_elem));\n-\t\t\t} else {\n-\t\t\t\tuint16_t size = vq->size - to;\n+\t\t\t\t} else {\n+\t\t\t\t\tuint16_t size = vq->size - to;\n \n-\t\t\t\trte_memcpy(&vq->used->ring[to],\n+\t\t\t\t\trte_memcpy(&vq->used->ring[to],\n \t\t\t\t\t\t&vq->async_descs_split[from],\n \t\t\t\t\t\tsize *\n \t\t\t\t\t\tsizeof(struct vring_used_elem));\n-\t\t\t\trte_memcpy(vq->used->ring,\n+\t\t\t\t\trte_memcpy(vq->used->ring,\n \t\t\t\t\t\t&vq->async_descs_split[from +\n \t\t\t\t\t\tsize], (nr_copy - size) *\n \t\t\t\t\t\tsizeof(struct vring_used_elem));\n-\t\t\t}\n+\t\t\t\t}\n+\n+\t\t\t\tvq->last_async_desc_idx += nr_copy;\n+\t\t\t\tvq->last_used_idx += nr_copy;\n+\t\t\t\tnr_left -= nr_copy;\n+\t\t\t} while (nr_left > 0);\n+\n+\t\t\t__atomic_add_fetch(&vq->used->idx, n_descs,\n+\t\t\t\t\t__ATOMIC_RELEASE);\n+\t\t\tvhost_vring_call_split(dev, vq);\n+\t\t}\n \n-\t\t\tvq->last_async_desc_idx += nr_copy;\n-\t\t\tvq->last_used_idx += nr_copy;\n-\t\t\tnr_left -= nr_copy;\n-\t\t} while (nr_left > 0);\n \n-\t\t__atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE);\n-\t\tvhost_vring_call_split(dev, vq);\n-\t} else\n-\t\tvq->last_async_desc_idx += n_descs;\n+\n+\t} else {\n+\t\tif (vq_is_packed(dev))\n+\t\t\tvq->last_async_buffer_idx += n_buffers;\n+\t\telse\n+\t\t\tvq->last_async_desc_idx += n_descs;\n+\t}\n \n done:\n \trte_spinlock_unlock(&vq->access_lock);\n@@ -1767,9 +2153,10 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,\n \tif (count == 0)\n \t\tgoto out;\n \n-\t/* TODO: packed queue not implemented */\n \tif (vq_is_packed(dev))\n-\t\tnb_tx = 0;\n+\t\tnb_tx = virtio_dev_rx_async_submit_packed(dev,\n+\t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n+\t\t\t\tcomp_count);\n \telse\n \t\tnb_tx = virtio_dev_rx_async_submit_split(dev,\n \t\t\t\tvq, queue_id, pkts, count, comp_pkts,\n",
    "prefixes": []
}