get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/105406/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 105406,
    "url": "http://patchwork.dpdk.org/api/patches/105406/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20211224225923.806498-2-dharmik.thakkar@arm.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20211224225923.806498-2-dharmik.thakkar@arm.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20211224225923.806498-2-dharmik.thakkar@arm.com",
    "date": "2021-12-24T22:59:23",
    "name": "[1/1] mempool: implement index-based per core cache",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "f65f047b76cb3b29c8a9cec920bc375374ef44b5",
    "submitter": {
        "id": 1108,
        "url": "http://patchwork.dpdk.org/api/people/1108/?format=api",
        "name": "Dharmik Thakkar",
        "email": "dharmik.thakkar@arm.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20211224225923.806498-2-dharmik.thakkar@arm.com/mbox/",
    "series": [
        {
            "id": 21027,
            "url": "http://patchwork.dpdk.org/api/series/21027/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=21027",
            "date": "2021-12-24T22:59:23",
            "name": "mempool: implement index-based per core cache",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/21027/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/105406/comments/",
    "check": "warning",
    "checks": "http://patchwork.dpdk.org/api/patches/105406/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id A151AA0352;\n\tSat, 25 Dec 2021 00:00:13 +0100 (CET)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 3B0DB4067B;\n\tSat, 25 Dec 2021 00:00:13 +0100 (CET)",
            "from foss.arm.com (foss.arm.com [217.140.110.172])\n by mails.dpdk.org (Postfix) with ESMTP id 131884067B\n for <dev@dpdk.org>; Sat, 25 Dec 2021 00:00:11 +0100 (CET)",
            "from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])\n by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 74499ED1;\n Fri, 24 Dec 2021 15:00:10 -0800 (PST)",
            "from 2p2660v4-1.austin.arm.com (2p2660v4-1.austin.arm.com\n [10.118.13.211])\n by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 628D93F718;\n Fri, 24 Dec 2021 15:00:10 -0800 (PST)"
        ],
        "From": "Dharmik Thakkar <dharmik.thakkar@arm.com>",
        "To": "Olivier Matz <olivier.matz@6wind.com>,\n Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>",
        "Cc": "dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com,\n ruifeng.wang@arm.com, Dharmik Thakkar <dharmik.thakkar@arm.com>",
        "Subject": "[PATCH 1/1] mempool: implement index-based per core cache",
        "Date": "Fri, 24 Dec 2021 16:59:23 -0600",
        "Message-Id": "<20211224225923.806498-2-dharmik.thakkar@arm.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20211224225923.806498-1-dharmik.thakkar@arm.com>",
        "References": "<20210930172735.2675627-1-dharmik.thakkar@arm.com>\n <20211224225923.806498-1-dharmik.thakkar@arm.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "Current mempool per core cache implementation stores pointers to mbufs\nOn 64b architectures, each pointer consumes 8B\nThis patch replaces it with index-based implementation,\nwhere in each buffer is addressed by (pool base address + index)\nIt reduces the amount of memory/cache required for per core cache\n\nL3Fwd performance testing reveals minor improvements in the cache\nperformance (L1 and L2 misses reduced by 0.60%)\nwith no change in throughput\n\nSuggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>\nSigned-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>\nReviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>\n---\n lib/mempool/rte_mempool.h             | 114 +++++++++++++++++++++++++-\n lib/mempool/rte_mempool_ops_default.c |   7 ++\n 2 files changed, 119 insertions(+), 2 deletions(-)",
    "diff": "diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h\nindex 1e7a3c15273c..4fabd3b1920b 100644\n--- a/lib/mempool/rte_mempool.h\n+++ b/lib/mempool/rte_mempool.h\n@@ -50,6 +50,10 @@\n #include <rte_memcpy.h>\n #include <rte_common.h>\n \n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+#include <rte_vect.h>\n+#endif\n+\n #include \"rte_mempool_trace_fp.h\"\n \n #ifdef __cplusplus\n@@ -239,6 +243,9 @@ struct rte_mempool {\n \tint32_t ops_index;\n \n \tstruct rte_mempool_cache *local_cache; /**< Per-lcore local cache */\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tvoid *pool_base_value; /**< Base value to calculate indices */\n+#endif\n \n \tuint32_t populated_size;         /**< Number of populated objects. */\n \tstruct rte_mempool_objhdr_list elt_list; /**< List of objects in pool */\n@@ -1314,7 +1321,19 @@ rte_mempool_cache_flush(struct rte_mempool_cache *cache,\n \tif (cache == NULL || cache->len == 0)\n \t\treturn;\n \trte_mempool_trace_cache_flush(cache, mp);\n+\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tunsigned int i;\n+\tunsigned int cache_len = cache->len;\n+\tvoid *obj_table[RTE_MEMPOOL_CACHE_MAX_SIZE * 3];\n+\tvoid *base_value = mp->pool_base_value;\n+\tuint32_t *cache_objs = (uint32_t *) cache->objs;\n+\tfor (i = 0; i < cache_len; i++)\n+\t\tobj_table[i] = (void *) RTE_PTR_ADD(base_value, cache_objs[i]);\n+\trte_mempool_ops_enqueue_bulk(mp, obj_table, cache->len);\n+#else\n \trte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);\n+#endif\n \tcache->len = 0;\n }\n \n@@ -1334,8 +1353,13 @@ static __rte_always_inline void\n rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,\n \t\t\t   unsigned int n, struct rte_mempool_cache *cache)\n {\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tuint32_t *cache_objs;\n+\tvoid *base_value;\n+\tuint32_t i;\n+#else\n \tvoid **cache_objs;\n-\n+#endif\n \t/* increment stat now, adding in mempool always success */\n \tRTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1);\n \tRTE_MEMPOOL_STAT_ADD(mp, put_objs, n);\n@@ -1344,7 +1368,13 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,\n \tif (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))\n \t\tgoto ring_enqueue;\n \n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tcache_objs = (uint32_t *) cache->objs;\n+\tcache_objs = &cache_objs[cache->len];\n+\tbase_value = mp->pool_base_value;\n+#else\n \tcache_objs = &cache->objs[cache->len];\n+#endif\n \n \t/*\n \t * The cache follows the following algorithm\n@@ -1354,13 +1384,40 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,\n \t */\n \n \t/* Add elements back into the cache */\n+\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+#if defined __ARM_NEON\n+\tuint64x2_t v_obj_table;\n+\tuint64x2_t v_base_value = vdupq_n_u64((uint64_t)base_value);\n+\tuint32x2_t v_cache_objs;\n+\n+\tfor (i = 0; i < (n & ~0x1); i += 2) {\n+\t\tv_obj_table = vld1q_u64((const uint64_t *)&obj_table[i]);\n+\t\tv_cache_objs = vqmovn_u64(vsubq_u64(v_obj_table, v_base_value));\n+\t\tvst1_u32(cache_objs + i, v_cache_objs);\n+\t}\n+\tif (n & 0x1) {\n+\t\tcache_objs[i] = (uint32_t) RTE_PTR_DIFF(obj_table[i], base_value);\n+\t}\n+#else\n+\tfor (i = 0; i < n; i++) {\n+\t\tcache_objs[i] = (uint32_t) RTE_PTR_DIFF(obj_table[i], base_value);\n+\t}\n+#endif\n+#else\n \trte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);\n+#endif\n \n \tcache->len += n;\n \n \tif (cache->len >= cache->flushthresh) {\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\t\trte_mempool_ops_enqueue_bulk(mp, obj_table + cache->len - cache->size,\n+\t\t\t\tcache->len - cache->size);\n+#else\n \t\trte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],\n \t\t\t\tcache->len - cache->size);\n+#endif\n \t\tcache->len = cache->size;\n \t}\n \n@@ -1461,13 +1518,22 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,\n {\n \tint ret;\n \tuint32_t index, len;\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tuint32_t i;\n+\tuint32_t *cache_objs;\n+#else\n \tvoid **cache_objs;\n-\n+#endif\n \t/* No cache provided or cannot be satisfied from cache */\n \tif (unlikely(cache == NULL || n >= cache->size))\n \t\tgoto ring_dequeue;\n \n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\tvoid *base_value = mp->pool_base_value;\n+\tcache_objs = (uint32_t *) cache->objs;\n+#else\n \tcache_objs = cache->objs;\n+#endif\n \n \t/* Can this be satisfied from the cache? */\n \tif (cache->len < n) {\n@@ -1475,8 +1541,14 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,\n \t\tuint32_t req = n + (cache->size - cache->len);\n \n \t\t/* How many do we require i.e. number to fill the cache + the request */\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\t\tvoid *temp_objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */\n+\t\tret = rte_mempool_ops_dequeue_bulk(mp,\n+\t\t\ttemp_objs, req);\n+#else\n \t\tret = rte_mempool_ops_dequeue_bulk(mp,\n \t\t\t&cache->objs[cache->len], req);\n+#endif\n \t\tif (unlikely(ret < 0)) {\n \t\t\t/*\n \t\t\t * In the off chance that we are buffer constrained,\n@@ -1487,12 +1559,50 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,\n \t\t\tgoto ring_dequeue;\n \t\t}\n \n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\t\tlen = cache->len;\n+\t\tfor (i = 0; i < req; ++i, ++len) {\n+\t\t\tcache_objs[len] = (uint32_t) RTE_PTR_DIFF(temp_objs[i],\n+\t\t\t\t\t\t\t\tbase_value);\n+\t\t}\n+#endif\n \t\tcache->len += req;\n \t}\n \n \t/* Now fill in the response ... */\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+#if defined __ARM_NEON\n+\tuint64x2_t v_obj_table;\n+\tuint64x2_t v_cache_objs;\n+\tuint64x2_t v_base_value = vdupq_n_u64((uint64_t)base_value);\n+\n+\tfor (index = 0, len = cache->len - 1; index < (n & ~0x3); index += 4,\n+\t\t\t\t\t\tlen -= 4, obj_table += 4) {\n+\t\tv_cache_objs = vmovl_u32(vld1_u32(cache_objs + len - 1));\n+\t\tv_obj_table = vaddq_u64(v_cache_objs, v_base_value);\n+\t\tvst1q_u64((uint64_t *)obj_table, v_obj_table);\n+\t\tv_cache_objs = vmovl_u32(vld1_u32(cache_objs + len - 3));\n+\t\tv_obj_table = vaddq_u64(v_cache_objs, v_base_value);\n+\t\tvst1q_u64((uint64_t *)(obj_table + 2), v_obj_table);\n+\t}\n+\tswitch (n & 0x3) {\n+\tcase 3:\n+\t\t*(obj_table++) = (void *) RTE_PTR_ADD(base_value, cache_objs[len--]);\n+\t\t\t\t\t\t\t\t/* fallthrough */\n+\tcase 2:\n+\t\t*(obj_table++) = (void *) RTE_PTR_ADD(base_value, cache_objs[len--]);\n+\t\t\t\t\t\t\t\t/* fallthrough */\n+\tcase 1:\n+\t\t*(obj_table++) = (void *) RTE_PTR_ADD(base_value, cache_objs[len--]);\n+\t}\n+#else\n+\tfor (index = 0, len = cache->len - 1; index < n; ++index, len--, obj_table++)\n+\t\t*obj_table = (void *) RTE_PTR_ADD(base_value, cache_objs[len]);\n+#endif\n+#else\n \tfor (index = 0, len = cache->len - 1; index < n; ++index, len--, obj_table++)\n \t\t*obj_table = cache_objs[len];\n+#endif\n \n \tcache->len -= n;\n \ndiff --git a/lib/mempool/rte_mempool_ops_default.c b/lib/mempool/rte_mempool_ops_default.c\nindex 22fccf9d7619..3543cad9d4ce 100644\n--- a/lib/mempool/rte_mempool_ops_default.c\n+++ b/lib/mempool/rte_mempool_ops_default.c\n@@ -127,6 +127,13 @@ rte_mempool_op_populate_helper(struct rte_mempool *mp, unsigned int flags,\n \t\tobj = va + off;\n \t\tobj_cb(mp, obj_cb_arg, obj,\n \t\t       (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));\n+#ifdef RTE_MEMPOOL_INDEX_BASED_LCORE_CACHE\n+\t\t/* Store pool base value to calculate indices for index-based\n+\t\t * lcore cache implementation\n+\t\t */\n+\t\tif (i == 0)\n+\t\t\tmp->pool_base_value = obj;\n+#endif\n \t\trte_mempool_ops_enqueue_bulk(mp, &obj, 1);\n \t\toff += mp->elt_size + mp->trailer_size;\n \t}\n",
    "prefixes": [
        "1/1"
    ]
}