get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/92486/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 92486,
    "url": "http://patchwork.dpdk.org/api/patches/92486/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20210430111727.12203-11-bruce.richardson@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210430111727.12203-11-bruce.richardson@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210430111727.12203-11-bruce.richardson@intel.com",
    "date": "2021-04-30T11:17:25",
    "name": "[v3,10/12] raw/ioat: rework SW ring layout",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "9f81629bdbe4d0f3ec35f9a7b6d484ff9da90346",
    "submitter": {
        "id": 20,
        "url": "http://patchwork.dpdk.org/api/people/20/?format=api",
        "name": "Bruce Richardson",
        "email": "bruce.richardson@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20210430111727.12203-11-bruce.richardson@intel.com/mbox/",
    "series": [
        {
            "id": 16773,
            "url": "http://patchwork.dpdk.org/api/series/16773/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=16773",
            "date": "2021-04-30T11:17:15",
            "name": "ioat driver updates",
            "version": 3,
            "mbox": "http://patchwork.dpdk.org/series/16773/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/92486/comments/",
    "check": "success",
    "checks": "http://patchwork.dpdk.org/api/patches/92486/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 934A7A0546;\n\tFri, 30 Apr 2021 13:18:41 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 8C39A41196;\n\tFri, 30 Apr 2021 13:18:06 +0200 (CEST)",
            "from mga17.intel.com (mga17.intel.com [192.55.52.151])\n by mails.dpdk.org (Postfix) with ESMTP id E041441200\n for <dev@dpdk.org>; Fri, 30 Apr 2021 13:18:04 +0200 (CEST)",
            "from orsmga003.jf.intel.com ([10.7.209.27])\n by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 30 Apr 2021 04:18:04 -0700",
            "from silpixa00399126.ir.intel.com ([10.237.223.78])\n by orsmga003.jf.intel.com with ESMTP; 30 Apr 2021 04:18:02 -0700"
        ],
        "IronPort-SDR": [
            "\n oZm+mtBybe/SQnv32rMLszWKSRXbVvsWfZ2eyBtXZw8VwyDHSAS7ZAEz/vvFgxlDhXlCAnXAVh\n XWP9FWIT8xQA==",
            "\n 34HrjtXyJ85dQEjSvLG0ntMd7eTxiGdgWNxAPA4nCJ4JHZw0zkr6POUCjGWq3vlwq4+P/a+jCh\n 0ZQ8fxPBAENw=="
        ],
        "X-IronPort-AV": [
            "E=McAfee;i=\"6200,9189,9969\"; a=\"177410708\"",
            "E=Sophos;i=\"5.82,262,1613462400\"; d=\"scan'208\";a=\"177410708\"",
            "E=Sophos;i=\"5.82,262,1613462400\"; d=\"scan'208\";a=\"387325151\""
        ],
        "X-ExtLoop1": "1",
        "From": "Bruce Richardson <bruce.richardson@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "kevin.laatz@intel.com, sunil.pai.g@intel.com, jiayu.hu@intel.com,\n Bruce Richardson <bruce.richardson@intel.com>",
        "Date": "Fri, 30 Apr 2021 12:17:25 +0100",
        "Message-Id": "<20210430111727.12203-11-bruce.richardson@intel.com>",
        "X-Mailer": "git-send-email 2.30.2",
        "In-Reply-To": "<20210430111727.12203-1-bruce.richardson@intel.com>",
        "References": "<20210318182042.43658-1-bruce.richardson@intel.com>\n <20210430111727.12203-1-bruce.richardson@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH v3 10/12] raw/ioat: rework SW ring layout",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "The ring management in the idxd part of the driver is more complex than\nit needs to be, tracking individual batches in a ring and having null\ndescriptors as padding to avoid having single-operation batches. This can\nbe simplified by using a regular ring-based layout, with additional\noverflow at the end to ensure that the one does not need to wrap within a\nbatch.\n\nSigned-off-by: Bruce Richardson <bruce.richardson@intel.com>\n---\n drivers/raw/ioat/idxd_pci.c            |   5 +-\n drivers/raw/ioat/ioat_common.c         |  99 +++++------\n drivers/raw/ioat/ioat_rawdev_test.c    |   1 +\n drivers/raw/ioat/rte_idxd_rawdev_fns.h | 233 +++++++++++++------------\n 4 files changed, 179 insertions(+), 159 deletions(-)",
    "diff": "diff --git a/drivers/raw/ioat/idxd_pci.c b/drivers/raw/ioat/idxd_pci.c\nindex b48e565b4c..13515dbc6c 100644\n--- a/drivers/raw/ioat/idxd_pci.c\n+++ b/drivers/raw/ioat/idxd_pci.c\n@@ -90,7 +90,7 @@ idxd_pci_dev_start(struct rte_rawdev *dev)\n \t\treturn 0;\n \t}\n \n-\tif (idxd->public.batch_ring == NULL) {\n+\tif (idxd->public.desc_ring == NULL) {\n \t\tIOAT_PMD_ERR(\"WQ %d has not been fully configured\", idxd->qid);\n \t\treturn -EINVAL;\n \t}\n@@ -337,7 +337,8 @@ idxd_rawdev_destroy(const char *name)\n \t/* free device memory */\n \tIOAT_PMD_DEBUG(\"Freeing device driver memory\");\n \trdev->dev_private = NULL;\n-\trte_free(idxd->public.batch_ring);\n+\trte_free(idxd->public.batch_idx_ring);\n+\trte_free(idxd->public.desc_ring);\n \trte_free(idxd->public.hdl_ring);\n \trte_memzone_free(idxd->mz);\n \ndiff --git a/drivers/raw/ioat/ioat_common.c b/drivers/raw/ioat/ioat_common.c\nindex d055c36a2a..fcb30572e6 100644\n--- a/drivers/raw/ioat/ioat_common.c\n+++ b/drivers/raw/ioat/ioat_common.c\n@@ -84,21 +84,21 @@ idxd_dev_dump(struct rte_rawdev *dev, FILE *f)\n \tfprintf(f, \"Driver: %s\\n\\n\", dev->driver_name);\n \n \tfprintf(f, \"Portal: %p\\n\", rte_idxd->portal);\n-\tfprintf(f, \"Batch Ring size: %u\\n\", rte_idxd->batch_ring_sz);\n-\tfprintf(f, \"Comp Handle Ring size: %u\\n\\n\", rte_idxd->hdl_ring_sz);\n-\n-\tfprintf(f, \"Next batch: %u\\n\", rte_idxd->next_batch);\n-\tfprintf(f, \"Next batch to be completed: %u\\n\", rte_idxd->next_completed);\n-\tfor (i = 0; i < rte_idxd->batch_ring_sz; i++) {\n-\t\tstruct rte_idxd_desc_batch *b = &rte_idxd->batch_ring[i];\n-\t\tfprintf(f, \"Batch %u @%p: submitted=%u, op_count=%u, hdl_end=%u\\n\",\n-\t\t\t\ti, b, b->submitted, b->op_count, b->hdl_end);\n-\t}\n-\n-\tfprintf(f, \"\\n\");\n-\tfprintf(f, \"Next free hdl: %u\\n\", rte_idxd->next_free_hdl);\n-\tfprintf(f, \"Last completed hdl: %u\\n\", rte_idxd->last_completed_hdl);\n-\tfprintf(f, \"Next returned hdl: %u\\n\", rte_idxd->next_ret_hdl);\n+\tfprintf(f, \"Config: {ring_size: %u, hdls_disable: %u}\\n\\n\",\n+\t\t\trte_idxd->cfg.ring_size, rte_idxd->cfg.hdls_disable);\n+\n+\tfprintf(f, \"max batches: %u\\n\", rte_idxd->max_batches);\n+\tfprintf(f, \"batch idx read: %u\\n\", rte_idxd->batch_idx_read);\n+\tfprintf(f, \"batch idx write: %u\\n\", rte_idxd->batch_idx_write);\n+\tfprintf(f, \"batch idxes:\");\n+\tfor (i = 0; i < rte_idxd->max_batches + 1; i++)\n+\t\tfprintf(f, \"%u \", rte_idxd->batch_idx_ring[i]);\n+\tfprintf(f, \"\\n\\n\");\n+\n+\tfprintf(f, \"hdls read: %u\\n\", rte_idxd->max_batches);\n+\tfprintf(f, \"hdls avail: %u\\n\", rte_idxd->hdls_avail);\n+\tfprintf(f, \"batch start: %u\\n\", rte_idxd->batch_start);\n+\tfprintf(f, \"batch size: %u\\n\", rte_idxd->batch_size);\n \n \treturn 0;\n }\n@@ -114,10 +114,8 @@ idxd_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info,\n \tif (info_size != sizeof(*cfg))\n \t\treturn -EINVAL;\n \n-\tif (cfg != NULL) {\n-\t\tcfg->ring_size = rte_idxd->hdl_ring_sz;\n-\t\tcfg->hdls_disable = rte_idxd->hdls_disable;\n-\t}\n+\tif (cfg != NULL)\n+\t\t*cfg = rte_idxd->cfg;\n \treturn 0;\n }\n \n@@ -129,8 +127,6 @@ idxd_dev_configure(const struct rte_rawdev *dev,\n \tstruct rte_idxd_rawdev *rte_idxd = &idxd->public;\n \tstruct rte_ioat_rawdev_config *cfg = config;\n \tuint16_t max_desc = cfg->ring_size;\n-\tuint16_t max_batches = max_desc / BATCH_SIZE;\n-\tuint16_t i;\n \n \tif (config_size != sizeof(*cfg))\n \t\treturn -EINVAL;\n@@ -140,47 +136,34 @@ idxd_dev_configure(const struct rte_rawdev *dev,\n \t\treturn -EAGAIN;\n \t}\n \n-\trte_idxd->hdls_disable = cfg->hdls_disable;\n+\trte_idxd->cfg = *cfg;\n \n-\t/* limit the batches to what can be stored in hardware */\n-\tif (max_batches > idxd->max_batches) {\n-\t\tIOAT_PMD_DEBUG(\"Ring size of %u is too large for this device, need to limit to %u batches of %u\",\n-\t\t\t\tmax_desc, idxd->max_batches, BATCH_SIZE);\n-\t\tmax_batches = idxd->max_batches;\n-\t\tmax_desc = max_batches * BATCH_SIZE;\n-\t}\n \tif (!rte_is_power_of_2(max_desc))\n \t\tmax_desc = rte_align32pow2(max_desc);\n-\tIOAT_PMD_DEBUG(\"Rawdev %u using %u descriptors in %u batches\",\n-\t\t\tdev->dev_id, max_desc, max_batches);\n+\tIOAT_PMD_DEBUG(\"Rawdev %u using %u descriptors\",\n+\t\t\tdev->dev_id, max_desc);\n+\trte_idxd->desc_ring_mask = max_desc - 1;\n \n \t/* in case we are reconfiguring a device, free any existing memory */\n-\trte_free(rte_idxd->batch_ring);\n+\trte_free(rte_idxd->desc_ring);\n \trte_free(rte_idxd->hdl_ring);\n \n-\trte_idxd->batch_ring = rte_zmalloc(NULL,\n-\t\t\tsizeof(*rte_idxd->batch_ring) * max_batches, 0);\n-\tif (rte_idxd->batch_ring == NULL)\n+\t/* allocate the descriptor ring at 2x size as batches can't wrap */\n+\trte_idxd->desc_ring = rte_zmalloc(NULL,\n+\t\t\tsizeof(*rte_idxd->desc_ring) * max_desc * 2, 0);\n+\tif (rte_idxd->desc_ring == NULL)\n \t\treturn -ENOMEM;\n+\trte_idxd->desc_iova = rte_mem_virt2iova(rte_idxd->desc_ring);\n \n \trte_idxd->hdl_ring = rte_zmalloc(NULL,\n \t\t\tsizeof(*rte_idxd->hdl_ring) * max_desc, 0);\n \tif (rte_idxd->hdl_ring == NULL) {\n-\t\trte_free(rte_idxd->batch_ring);\n-\t\trte_idxd->batch_ring = NULL;\n+\t\trte_free(rte_idxd->desc_ring);\n+\t\trte_idxd->desc_ring = NULL;\n \t\treturn -ENOMEM;\n \t}\n-\trte_idxd->batch_ring_sz = max_batches;\n-\trte_idxd->hdl_ring_sz = max_desc;\n-\n-\tfor (i = 0; i < rte_idxd->batch_ring_sz; i++) {\n-\t\tstruct rte_idxd_desc_batch *b = &rte_idxd->batch_ring[i];\n-\t\tb->batch_desc.completion = rte_mem_virt2iova(&b->comp);\n-\t\tb->batch_desc.desc_addr = rte_mem_virt2iova(&b->null_desc);\n-\t\tb->batch_desc.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |\n-\t\t\t\tIDXD_FLAG_COMPLETION_ADDR_VALID |\n-\t\t\t\tIDXD_FLAG_REQUEST_COMPLETION;\n-\t}\n+\trte_idxd->hdls_read = rte_idxd->batch_start = 0;\n+\trte_idxd->batch_size = 0;\n \n \treturn 0;\n }\n@@ -191,6 +174,7 @@ idxd_rawdev_create(const char *name, struct rte_device *dev,\n \t\t   const struct rte_rawdev_ops *ops)\n {\n \tstruct idxd_rawdev *idxd;\n+\tstruct rte_idxd_rawdev *public;\n \tstruct rte_rawdev *rawdev = NULL;\n \tconst struct rte_memzone *mz = NULL;\n \tchar mz_name[RTE_MEMZONE_NAMESIZE];\n@@ -245,13 +229,30 @@ idxd_rawdev_create(const char *name, struct rte_device *dev,\n \n \tidxd = rawdev->dev_private;\n \t*idxd = *base_idxd; /* copy over the main fields already passed in */\n-\tidxd->public.type = RTE_IDXD_DEV;\n \tidxd->rawdev = rawdev;\n \tidxd->mz = mz;\n \n+\tpublic = &idxd->public;\n+\tpublic->type = RTE_IDXD_DEV;\n+\tpublic->max_batches = idxd->max_batches;\n+\tpublic->batch_idx_read = 0;\n+\tpublic->batch_idx_write = 0;\n+\t/* allocate batch index ring. The +1 is because we can never fully use\n+\t * the ring, otherwise read == write means both full and empty.\n+\t */\n+\tpublic->batch_idx_ring = rte_zmalloc(NULL,\n+\t\t\tsizeof(uint16_t) * (idxd->max_batches + 1), 0);\n+\tif (public->batch_idx_ring == NULL) {\n+\t\tIOAT_PMD_ERR(\"Unable to reserve memory for batch data\\n\");\n+\t\tret = -ENOMEM;\n+\t\tgoto cleanup;\n+\t}\n+\n \treturn 0;\n \n cleanup:\n+\tif (mz)\n+\t\trte_memzone_free(mz);\n \tif (rawdev)\n \t\trte_rawdev_pmd_release(rawdev);\n \ndiff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c\nindex a5064d739d..51eebe152f 100644\n--- a/drivers/raw/ioat/ioat_rawdev_test.c\n+++ b/drivers/raw/ioat/ioat_rawdev_test.c\n@@ -206,6 +206,7 @@ test_enqueue_copies(int dev_id)\n \t\tif (rte_ioat_completed_ops(dev_id, max_completions, (void *)&completed[0],\n \t\t\t\t(void *)&completed[max_completions]) != max_ops) {\n \t\t\tPRINT_ERR(\"Error with rte_ioat_completed_ops\\n\");\n+\t\t\trte_rawdev_dump(dev_id, stdout);\n \t\t\treturn -1;\n \t\t}\n \t\tif (completed[0] != src || completed[max_completions] != dst) {\ndiff --git a/drivers/raw/ioat/rte_idxd_rawdev_fns.h b/drivers/raw/ioat/rte_idxd_rawdev_fns.h\nindex c2a12ebef0..4c49d2b84a 100644\n--- a/drivers/raw/ioat/rte_idxd_rawdev_fns.h\n+++ b/drivers/raw/ioat/rte_idxd_rawdev_fns.h\n@@ -7,7 +7,7 @@\n #include <stdint.h>\n \n /*\n- * Defines used in the data path for interacting with hardware.\n+ * Defines used in the data path for interacting with IDXD hardware.\n  */\n #define IDXD_CMD_OP_SHIFT 24\n enum rte_idxd_ops {\n@@ -67,26 +67,6 @@ struct rte_idxd_completion {\n \tuint32_t invalid_flags;\n } __rte_aligned(32);\n \n-#define BATCH_SIZE 64\n-\n-/**\n- * Structure used inside the driver for building up and submitting\n- * a batch of operations to the DSA hardware.\n- */\n-struct rte_idxd_desc_batch {\n-\tstruct rte_idxd_completion comp; /* the completion record for batch */\n-\n-\tuint16_t submitted;\n-\tuint16_t op_count;\n-\tuint16_t hdl_end;\n-\n-\tstruct rte_idxd_hw_desc batch_desc;\n-\n-\t/* batches must always have 2 descriptors, so put a null at the start */\n-\tstruct rte_idxd_hw_desc null_desc;\n-\tstruct rte_idxd_hw_desc ops[BATCH_SIZE];\n-};\n-\n /**\n  * structure used to save the \"handles\" provided by the user to be\n  * returned to the user on job completion.\n@@ -106,51 +86,65 @@ struct rte_idxd_rawdev {\n \n \tvoid *portal; /* address to write the batch descriptor */\n \n-\t/* counters to track the batches and the individual op handles */\n-\tuint16_t batch_ring_sz;  /* size of batch ring */\n-\tuint16_t hdl_ring_sz;    /* size of the user hdl ring */\n+\tstruct rte_ioat_rawdev_config cfg;\n+\trte_iova_t desc_iova; /* base address of desc ring, needed for completions */\n \n-\tuint16_t next_batch;     /* where we write descriptor ops */\n-\tuint16_t next_completed; /* batch where we read completions */\n-\tuint16_t next_ret_hdl;   /* the next user hdl to return */\n-\tuint16_t last_completed_hdl; /* the last user hdl that has completed */\n-\tuint16_t next_free_hdl;  /* where the handle for next op will go */\n-\tuint16_t hdls_disable;   /* disable tracking completion handles */\n+\t/* counters to track the batches */\n+\tunsigned short max_batches;\n+\tunsigned short batch_idx_read;\n+\tunsigned short batch_idx_write;\n+\tunsigned short *batch_idx_ring; /* store where each batch ends */\n \n+\t/* track descriptors and handles */\n+\tunsigned short desc_ring_mask;\n+\tunsigned short hdls_avail; /* handles for ops completed */\n+\tunsigned short hdls_read; /* the read pointer for hdls/desc rings */\n+\tunsigned short batch_start; /* start+size == write pointer for hdls/desc */\n+\tunsigned short batch_size;\n+\n+\tstruct rte_idxd_hw_desc *desc_ring;\n \tstruct rte_idxd_user_hdl *hdl_ring;\n-\tstruct rte_idxd_desc_batch *batch_ring;\n };\n \n+static __rte_always_inline rte_iova_t\n+__desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n)\n+{\n+\treturn idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc));\n+}\n+\n static __rte_always_inline int\n-__idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc,\n+__idxd_write_desc(int dev_id,\n+\t\tconst uint32_t op_flags,\n+\t\tconst rte_iova_t src,\n+\t\tconst rte_iova_t dst,\n+\t\tconst uint32_t size,\n \t\tconst struct rte_idxd_user_hdl *hdl)\n {\n \tstruct rte_idxd_rawdev *idxd =\n \t\t\t(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;\n-\tstruct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch];\n+\tuint16_t write_idx = idxd->batch_start + idxd->batch_size;\n \n-\t/* check for room in the handle ring */\n-\tif (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl)\n+\t/* first check batch ring space then desc ring space */\n+\tif ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||\n+\t\t\tidxd->batch_idx_write + 1 == idxd->batch_idx_read)\n \t\tgoto failed;\n-\n-\t/* check for space in current batch */\n-\tif (b->op_count >= BATCH_SIZE)\n-\t\tgoto failed;\n-\n-\t/* check that we can actually use the current batch */\n-\tif (b->submitted)\n+\tif (((write_idx + 1) & idxd->desc_ring_mask) == idxd->hdls_read)\n \t\tgoto failed;\n \n-\t/* write the descriptor */\n-\tb->ops[b->op_count++] = *desc;\n+\t/* write desc and handle. Note, descriptors don't wrap */\n+\tidxd->desc_ring[write_idx].pasid = 0;\n+\tidxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;\n+\tidxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx);\n+\tidxd->desc_ring[write_idx].src = src;\n+\tidxd->desc_ring[write_idx].dst = dst;\n+\tidxd->desc_ring[write_idx].size = size;\n \n-\t/* store the completion details */\n-\tif (!idxd->hdls_disable)\n-\t\tidxd->hdl_ring[idxd->next_free_hdl] = *hdl;\n-\tif (++idxd->next_free_hdl == idxd->hdl_ring_sz)\n-\t\tidxd->next_free_hdl = 0;\n+\tidxd->hdl_ring[write_idx & idxd->desc_ring_mask] = *hdl;\n+\tidxd->batch_size++;\n \n \tidxd->xstats.enqueued++;\n+\n+\trte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);\n \treturn 1;\n \n failed:\n@@ -163,53 +157,42 @@ static __rte_always_inline int\n __idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,\n \t\tunsigned int length, uintptr_t dst_hdl)\n {\n-\tconst struct rte_idxd_hw_desc desc = {\n-\t\t\t.op_flags =  (idxd_op_fill << IDXD_CMD_OP_SHIFT) |\n-\t\t\t\tIDXD_FLAG_CACHE_CONTROL,\n-\t\t\t.src = pattern,\n-\t\t\t.dst = dst,\n-\t\t\t.size = length\n-\t};\n \tconst struct rte_idxd_user_hdl hdl = {\n \t\t\t.dst = dst_hdl\n \t};\n-\treturn __idxd_write_desc(dev_id, &desc, &hdl);\n+\treturn __idxd_write_desc(dev_id,\n+\t\t\t(idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,\n+\t\t\tpattern, dst, length, &hdl);\n }\n \n static __rte_always_inline int\n __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,\n \t\tunsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)\n {\n-\tconst struct rte_idxd_hw_desc desc = {\n-\t\t\t.op_flags =  (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |\n-\t\t\t\tIDXD_FLAG_CACHE_CONTROL,\n-\t\t\t.src = src,\n-\t\t\t.dst = dst,\n-\t\t\t.size = length\n-\t};\n \tconst struct rte_idxd_user_hdl hdl = {\n \t\t\t.src = src_hdl,\n \t\t\t.dst = dst_hdl\n \t};\n-\treturn __idxd_write_desc(dev_id, &desc, &hdl);\n+\treturn __idxd_write_desc(dev_id,\n+\t\t\t(idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,\n+\t\t\tsrc, dst, length, &hdl);\n }\n \n static __rte_always_inline int\n __idxd_fence(int dev_id)\n {\n-\tstatic const struct rte_idxd_hw_desc fence = {\n-\t\t\t.op_flags = IDXD_FLAG_FENCE\n-\t};\n \tstatic const struct rte_idxd_user_hdl null_hdl;\n-\treturn __idxd_write_desc(dev_id, &fence, &null_hdl);\n+\t/* only op field needs filling - zero src, dst and length */\n+\treturn __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, &null_hdl);\n }\n \n static __rte_always_inline void\n-__idxd_movdir64b(volatile void *dst, const void *src)\n+__idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src)\n {\n \tasm volatile (\".byte 0x66, 0x0f, 0x38, 0xf8, 0x02\"\n \t\t\t:\n-\t\t\t: \"a\" (dst), \"d\" (src));\n+\t\t\t: \"a\" (dst), \"d\" (src)\n+\t\t\t: \"memory\");\n }\n \n static __rte_always_inline int\n@@ -217,19 +200,49 @@ __idxd_perform_ops(int dev_id)\n {\n \tstruct rte_idxd_rawdev *idxd =\n \t\t\t(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;\n-\tstruct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch];\n+\t/* write completion to last desc in the batch */\n+\tuint16_t comp_idx = idxd->batch_start + idxd->batch_size - 1;\n+\tif (comp_idx > idxd->desc_ring_mask) {\n+\t\tcomp_idx &= idxd->desc_ring_mask;\n+\t\t*((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */\n+\t}\n \n-\tif (b->submitted || b->op_count == 0)\n+\tif (idxd->batch_size == 0)\n \t\treturn 0;\n-\tb->hdl_end = idxd->next_free_hdl;\n-\tb->comp.status = 0;\n-\tb->submitted = 1;\n-\tb->batch_desc.size = b->op_count + 1;\n-\t__idxd_movdir64b(idxd->portal, &b->batch_desc);\n-\n-\tif (++idxd->next_batch == idxd->batch_ring_sz)\n-\t\tidxd->next_batch = 0;\n-\tidxd->xstats.started = idxd->xstats.enqueued;\n+\n+\t_mm_sfence(); /* fence before writing desc to device */\n+\tif (idxd->batch_size > 1) {\n+\t\tstruct rte_idxd_hw_desc batch_desc = {\n+\t\t\t\t.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |\n+\t\t\t\t\tIDXD_FLAG_COMPLETION_ADDR_VALID |\n+\t\t\t\t\tIDXD_FLAG_REQUEST_COMPLETION,\n+\t\t\t\t.desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),\n+\t\t\t\t.completion = __desc_idx_to_iova(idxd, comp_idx),\n+\t\t\t\t.size = idxd->batch_size,\n+\t\t};\n+\n+\t\t__idxd_movdir64b(idxd->portal, &batch_desc);\n+\t} else {\n+\t\t/* special case batch size of 1, as not allowed by HW */\n+\t\t/* comp_idx == batch_start */\n+\t\tstruct rte_idxd_hw_desc *desc = &idxd->desc_ring[comp_idx];\n+\t\tdesc->op_flags |= IDXD_FLAG_COMPLETION_ADDR_VALID |\n+\t\t\t\tIDXD_FLAG_REQUEST_COMPLETION;\n+\t\tdesc->completion = __desc_idx_to_iova(idxd, comp_idx);\n+\n+\t\t__idxd_movdir64b(idxd->portal, desc);\n+\t}\n+\n+\tidxd->xstats.started += idxd->batch_size;\n+\n+\tidxd->batch_start += idxd->batch_size;\n+\tidxd->batch_start &= idxd->desc_ring_mask;\n+\tidxd->batch_size = 0;\n+\n+\tidxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;\n+\tif (idxd->batch_idx_write > idxd->max_batches)\n+\t\tidxd->batch_idx_write = 0;\n+\n \treturn 0;\n }\n \n@@ -239,35 +252,39 @@ __idxd_completed_ops(int dev_id, uint8_t max_ops,\n {\n \tstruct rte_idxd_rawdev *idxd =\n \t\t\t(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;\n-\tstruct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed];\n-\tuint16_t h_idx = idxd->next_ret_hdl;\n-\tint n = 0;\n-\n-\twhile (b->submitted && b->comp.status != 0) {\n-\t\tidxd->last_completed_hdl = b->hdl_end;\n-\t\tb->submitted = 0;\n-\t\tb->op_count = 0;\n-\t\tif (++idxd->next_completed == idxd->batch_ring_sz)\n-\t\t\tidxd->next_completed = 0;\n-\t\tb = &idxd->batch_ring[idxd->next_completed];\n+\tunsigned short n, h_idx;\n+\n+\twhile (idxd->batch_idx_read != idxd->batch_idx_write) {\n+\t\tuint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];\n+\t\tvolatile struct rte_idxd_completion *comp_to_chk =\n+\t\t\t\t(struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk];\n+\t\tif (comp_to_chk->status == 0)\n+\t\t\tbreak;\n+\t\t/* avail points to one after the last one written */\n+\t\tidxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;\n+\t\tidxd->batch_idx_read++;\n+\t\tif (idxd->batch_idx_read > idxd->max_batches)\n+\t\t\tidxd->batch_idx_read = 0;\n \t}\n \n-\tif (!idxd->hdls_disable)\n-\t\tfor (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) {\n-\t\t\tsrc_hdls[n] = idxd->hdl_ring[h_idx].src;\n-\t\t\tdst_hdls[n] = idxd->hdl_ring[h_idx].dst;\n-\t\t\tif (++h_idx == idxd->hdl_ring_sz)\n-\t\t\t\th_idx = 0;\n-\t\t}\n-\telse\n-\t\twhile (h_idx != idxd->last_completed_hdl) {\n-\t\t\tn++;\n-\t\t\tif (++h_idx == idxd->hdl_ring_sz)\n-\t\t\t\th_idx = 0;\n-\t\t}\n-\n-\tidxd->next_ret_hdl = h_idx;\n+\tif (idxd->cfg.hdls_disable) {\n+\t\tn = (idxd->hdls_avail < idxd->hdls_read) ?\n+\t\t\t\t(idxd->hdls_avail + idxd->desc_ring_mask + 1 - idxd->hdls_read) :\n+\t\t\t\t(idxd->hdls_avail - idxd->hdls_read);\n+\t\tidxd->hdls_read = idxd->hdls_avail;\n+\t\tgoto out;\n+\t}\n+\n+\tfor (n = 0, h_idx = idxd->hdls_read;\n+\t\t\tn < max_ops && h_idx != idxd->hdls_avail; n++) {\n+\t\tsrc_hdls[n] = idxd->hdl_ring[h_idx].src;\n+\t\tdst_hdls[n] = idxd->hdl_ring[h_idx].dst;\n+\t\tif (++h_idx > idxd->desc_ring_mask)\n+\t\t\th_idx = 0;\n+\t}\n+\tidxd->hdls_read = h_idx;\n \n+out:\n \tidxd->xstats.completed += n;\n \treturn n;\n }\n",
    "prefixes": [
        "v3",
        "10/12"
    ]
}