get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/64758/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 64758,
    "url": "http://patchwork.dpdk.org/api/patches/64758/?format=api",
    "web_url": "http://patchwork.dpdk.org/project/dpdk/patch/20200116092706.17388-2-huwei013@chinasoftinc.com/",
    "project": {
        "id": 1,
        "url": "http://patchwork.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20200116092706.17388-2-huwei013@chinasoftinc.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20200116092706.17388-2-huwei013@chinasoftinc.com",
    "date": "2020-01-16T09:27:03",
    "name": "[1/4] net/hns3: replace memory barrier with data dependency order",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "d5fca1b82d6438c7d486f9b01762ec9fd26485cd",
    "submitter": {
        "id": 1537,
        "url": "http://patchwork.dpdk.org/api/people/1537/?format=api",
        "name": "Wei Hu (Xavier)",
        "email": "huwei013@chinasoftinc.com"
    },
    "delegate": {
        "id": 319,
        "url": "http://patchwork.dpdk.org/api/users/319/?format=api",
        "username": "fyigit",
        "first_name": "Ferruh",
        "last_name": "Yigit",
        "email": "ferruh.yigit@amd.com"
    },
    "mbox": "http://patchwork.dpdk.org/project/dpdk/patch/20200116092706.17388-2-huwei013@chinasoftinc.com/mbox/",
    "series": [
        {
            "id": 8153,
            "url": "http://patchwork.dpdk.org/api/series/8153/?format=api",
            "web_url": "http://patchwork.dpdk.org/project/dpdk/list/?series=8153",
            "date": "2020-01-16T09:27:02",
            "name": "improvement and cleanup for hns3 PMD driver",
            "version": 1,
            "mbox": "http://patchwork.dpdk.org/series/8153/mbox/"
        }
    ],
    "comments": "http://patchwork.dpdk.org/api/patches/64758/comments/",
    "check": "success",
    "checks": "http://patchwork.dpdk.org/api/patches/64758/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 35F26A0352;\n\tThu, 16 Jan 2020 10:27:28 +0100 (CET)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id C34461C1DC;\n\tThu, 16 Jan 2020 10:27:20 +0100 (CET)",
            "from incedge.chinasoftinc.com (unknown [114.113.233.8])\n by dpdk.org (Postfix) with ESMTP id 184E51C1D5\n for <dev@dpdk.org>; Thu, 16 Jan 2020 10:27:16 +0100 (CET)",
            "from mail.chinasoftinc.com (inccas001.ito.icss [10.168.0.51]) by\n incedge.chinasoftinc.com with ESMTP id D2CLcCkIHoNXe8KH (version=TLSv1\n cipher=ECDHE-RSA-AES256-SHA bits=256 verify=NO) for <dev@dpdk.org>;\n Thu, 16 Jan 2020 17:27:14 +0800 (CST)",
            "from localhost.localdomain (203.160.91.226) by INCCAS001.ito.icss\n (10.168.0.60) with Microsoft SMTP Server id 14.3.439.0; Thu, 16 Jan 2020\n 17:27:14 +0800"
        ],
        "X-ASG-Debug-ID": "1579166833-0a3dd17c8806fb0003-TfluYd",
        "X-Barracuda-Envelope-From": "huwei013@chinasoftinc.com",
        "X-Barracuda-RBL-Trusted-Forwarder": [
            "10.168.0.51",
            "10.168.0.60"
        ],
        "X-ASG-Whitelist": "Client",
        "From": "\"Wei Hu (Xavier)\" <huwei013@chinasoftinc.com>",
        "To": "<dev@dpdk.org>",
        "Date": "Thu, 16 Jan 2020 17:27:03 +0800",
        "X-ASG-Orig-Subj": "[PATCH 1/4] net/hns3: replace memory barrier with data\n dependency order",
        "Message-ID": "<20200116092706.17388-2-huwei013@chinasoftinc.com>",
        "X-Mailer": "git-send-email 2.23.0",
        "In-Reply-To": "<20200116092706.17388-1-huwei013@chinasoftinc.com>",
        "References": "<20200116092706.17388-1-huwei013@chinasoftinc.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain",
        "X-Originating-IP": "[203.160.91.226]",
        "X-Barracuda-Connect": "inccas001.ito.icss[10.168.0.51]",
        "X-Barracuda-Start-Time": "1579166834",
        "X-Barracuda-Encrypted": "ECDHE-RSA-AES256-SHA",
        "X-Barracuda-URL": "https://spam.chinasoftinc.com:443/cgi-mod/mark.cgi",
        "X-Virus-Scanned": "by bsmtpd at chinasoftinc.com",
        "X-Barracuda-Scan-Msg-Size": "6866",
        "Subject": "[dpdk-dev] [PATCH 1/4] net/hns3: replace memory barrier with data\n\tdependency order",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: Chengwen Feng <fengchengwen@huawei.com>\n\nThis patch optimizes the Rx performance by using data dependency ordering\nto instead of memory barrier which is rte_cio_rmb in the '.rx_pkt_burst'\nops implementation function named hns3_recv_pkts.\n\nSigned-off-by: Chengwen Feng <fengchengwen@huawei.com>\nSigned-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>\n---\n drivers/net/hns3/hns3_rxtx.c | 85 +++++++++++++++++++++++++++++++-----\n 1 file changed, 73 insertions(+), 12 deletions(-)",
    "diff": "diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c\nindex 6f74a7917..9d8d0b7e1 100644\n--- a/drivers/net/hns3/hns3_rxtx.c\n+++ b/drivers/net/hns3/hns3_rxtx.c\n@@ -1402,13 +1402,14 @@ hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, uint64_t packet_type,\n uint16_t\n hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n {\n+\tvolatile struct hns3_desc *rx_ring;  /* RX ring (desc) */\n+\tvolatile struct hns3_desc *rxdp;     /* pointer of the current desc */\n \tstruct hns3_rx_queue *rxq;      /* RX queue */\n-\tstruct hns3_desc *rx_ring;      /* RX ring (desc) */\n \tstruct hns3_entry *sw_ring;\n \tstruct hns3_entry *rxe;\n-\tstruct hns3_desc *rxdp;         /* pointer of the current desc */\n \tstruct rte_mbuf *first_seg;\n \tstruct rte_mbuf *last_seg;\n+\tstruct hns3_desc rxd;\n \tstruct rte_mbuf *nmb;           /* pointer of the new mbuf */\n \tstruct rte_mbuf *rxm;\n \tstruct rte_eth_dev *dev;\n@@ -1440,6 +1441,67 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n \t\tbd_base_info = rte_le_to_cpu_32(rxdp->rx.bd_base_info);\n \t\tif (unlikely(!hns3_get_bit(bd_base_info, HNS3_RXD_VLD_B)))\n \t\t\tbreak;\n+\t\t/*\n+\t\t * The interactive process between software and hardware of\n+\t\t * receiving a new packet in hns3 network engine:\n+\t\t * 1. Hardware network engine firstly writes the packet content\n+\t\t *    to the memory pointed by the 'addr' field of the Rx Buffer\n+\t\t *    Descriptor, secondly fills the result of parsing the\n+\t\t *    packet include the valid field into the Rx Buffer\n+\t\t *    Descriptor in one write operation.\n+\t\t * 2. Driver reads the Rx BD's valid field in the loop to check\n+\t\t *    whether it's valid, if valid then assign a new address to\n+\t\t *    the addr field, clear the valid field, get the other\n+\t\t *    information of the packet by parsing Rx BD's other fields,\n+\t\t *    finally write back the number of Rx BDs processed by the\n+\t\t *    driver to the HNS3_RING_RX_HEAD_REG register to inform\n+\t\t *    hardware.\n+\t\t * In the above process, the ordering is very important. We must\n+\t\t * make sure that CPU read Rx BD's other fields only after the\n+\t\t * Rx BD is valid.\n+\t\t *\n+\t\t * There are two type of re-ordering: compiler re-ordering and\n+\t\t * CPU re-ordering under the ARMv8 architecture.\n+\t\t * 1. we use volatile to deal with compiler re-ordering, so you\n+\t\t *    can see that rx_ring/rxdp defined with volatile.\n+\t\t * 2. we commonly use memory barrier to deal with CPU\n+\t\t *    re-ordering, but the cost is high.\n+\t\t *\n+\t\t * In order to solve the high cost of using memory barrier, we\n+\t\t * use the data dependency order under the ARMv8 architecture,\n+\t\t * for exmple:\n+\t\t *      instr01: load A\n+\t\t *      instr02: load B <- A\n+\t\t * the instr02 will always execute after instr01.\n+\t\t *\n+\t\t * To construct the data dependency ordering, we use the\n+\t\t * following assignment:\n+\t\t *      rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) -\n+\t\t *                 (1u<<HNS3_RXD_VLD_B)]\n+\t\t * Using gcc compiler under the ARMv8 architecture, the related\n+\t\t * assembly code example as follows:\n+\t\t * note: (1u << HNS3_RXD_VLD_B) equal 0x10\n+\t\t *      instr01: ldr w26, [x22, #28]  --read bd_base_info\n+\t\t *      instr02: and w0, w26, #0x10   --calc bd_base_info & 0x10\n+\t\t *      instr03: sub w0, w0, #0x10    --calc (bd_base_info &\n+\t\t *                                            0x10) - 0x10\n+\t\t *      instr04: add x0, x22, x0, lsl #5 --calc copy source addr\n+\t\t *      instr05: ldp x2, x3, [x0]\n+\t\t *      instr06: stp x2, x3, [x29, #256] --copy BD's [0 ~ 15]B\n+\t\t *      instr07: ldp x4, x5, [x0, #16]\n+\t\t *      instr08: stp x4, x5, [x29, #272] --copy BD's [16 ~ 31]B\n+\t\t * the instr05~08 depend on x0's value, x0 depent on w26's\n+\t\t * value, the w26 is the bd_base_info, this form the data\n+\t\t * dependency ordering.\n+\t\t * note: if BD is valid, (bd_base_info & (1u<<HNS3_RXD_VLD_B)) -\n+\t\t *       (1u<<HNS3_RXD_VLD_B) will always zero, so the\n+\t\t *       assignment is correct.\n+\t\t *\n+\t\t * So we use the data dependency ordering instead of memory\n+\t\t * barrier to improve receive performance.\n+\t\t */\n+\t\trxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) -\n+\t\t\t   (1u << HNS3_RXD_VLD_B)];\n \n \t\tnmb = rte_mbuf_raw_alloc(rxq->mb_pool);\n \t\tif (unlikely(nmb == NULL)) {\n@@ -1463,14 +1525,13 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n \t\trxe->mbuf = nmb;\n \n \t\tdma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));\n-\t\trxdp->addr = dma_addr;\n \t\trxdp->rx.bd_base_info = 0;\n+\t\trxdp->addr = dma_addr;\n \n-\t\trte_cio_rmb();\n \t\t/* Load remained descriptor data and extract necessary fields */\n-\t\tdata_len = (uint16_t)(rte_le_to_cpu_16(rxdp->rx.size));\n-\t\tl234_info = rte_le_to_cpu_32(rxdp->rx.l234_info);\n-\t\tol_info = rte_le_to_cpu_32(rxdp->rx.ol_info);\n+\t\tdata_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.size));\n+\t\tl234_info = rte_le_to_cpu_32(rxd.rx.l234_info);\n+\t\tol_info = rte_le_to_cpu_32(rxd.rx.ol_info);\n \n \t\tif (first_seg == NULL) {\n \t\t\tfirst_seg = rxm;\n@@ -1489,14 +1550,14 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n \t\t}\n \n \t\t/* The last buffer of the received packet */\n-\t\tpkt_len = (uint16_t)(rte_le_to_cpu_16(rxdp->rx.pkt_len));\n+\t\tpkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len));\n \t\tfirst_seg->pkt_len = pkt_len;\n \t\tfirst_seg->port = rxq->port_id;\n-\t\tfirst_seg->hash.rss = rte_le_to_cpu_32(rxdp->rx.rss_hash);\n+\t\tfirst_seg->hash.rss = rte_le_to_cpu_32(rxd.rx.rss_hash);\n \t\tfirst_seg->ol_flags |= PKT_RX_RSS_HASH;\n \t\tif (unlikely(hns3_get_bit(bd_base_info, HNS3_RXD_LUM_B))) {\n \t\t\tfirst_seg->hash.fdir.hi =\n-\t\t\t\trte_le_to_cpu_32(rxdp->rx.fd_id);\n+\t\t\t\trte_le_to_cpu_32(rxd.rx.fd_id);\n \t\t\tfirst_seg->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;\n \t\t}\n \t\trxm->next = NULL;\n@@ -1513,9 +1574,9 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n \t\t\thns3_rx_set_cksum_flag(rxm, first_seg->packet_type,\n \t\t\t\t\t       cksum_err);\n \n-\t\tfirst_seg->vlan_tci = rte_le_to_cpu_16(rxdp->rx.vlan_tag);\n+\t\tfirst_seg->vlan_tci = rte_le_to_cpu_16(rxd.rx.vlan_tag);\n \t\tfirst_seg->vlan_tci_outer =\n-\t\t\trte_le_to_cpu_16(rxdp->rx.ot_vlan_tag);\n+\t\t\trte_le_to_cpu_16(rxd.rx.ot_vlan_tag);\n \t\trx_pkts[nb_rx++] = first_seg;\n \t\tfirst_seg = NULL;\n \t\tcontinue;\n",
    "prefixes": [
        "1/4"
    ]
}