From patchwork Thu May 9 01:57:48 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiaolong Ye X-Patchwork-Id: 53340 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 1DEC034F0; Thu, 9 May 2019 04:05:15 +0200 (CEST) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 698192C2B for ; Thu, 9 May 2019 04:05:13 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 08 May 2019 19:05:12 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga005.fm.intel.com with ESMTP; 08 May 2019 19:05:10 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: dev@dpdk.org Date: Thu, 9 May 2019 09:57:48 +0800 Message-Id: <20190509015748.110374-1-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [dpdk-dev] [PATCH v1] net/af_xdp: enable zero copy by extbuf X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Implement zero copy of af_xdp pmd through mbuf's external memory mechanism to achieve high performance. This patch also provides a new parameter "pmd_zero_copy" for user, so they can choose to enable zero copy of af_xdp pmd or not. To be clear, "zero copy" here is different from the "zero copy mode" of AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk application. Suggested-by: Olivier Matz Signed-off-by: Xiaolong Ye --- This patch is targeted for 19.08. doc/guides/nics/af_xdp.rst | 1 + drivers/net/af_xdp/rte_eth_af_xdp.c | 96 +++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 20 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 175038153..0bd4239fe 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``queue`` - netdev queue id (optional, default 0); +* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites ------------- diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 35c72272c..ebef7bf34 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -70,6 +70,7 @@ struct xsk_umem_info { struct xsk_umem *umem; struct rte_ring *buf_ring; const struct rte_memzone *mz; + int pmd_zc; }; struct rx_stats { @@ -109,6 +110,7 @@ struct pmd_internals { int if_index; char if_name[IFNAMSIZ]; uint16_t queue_idx; + int pmd_zc; struct ether_addr eth_addr; struct xsk_umem_info *umem; struct rte_mempool *mb_pool_share; @@ -119,10 +121,12 @@ struct pmd_internals { #define ETH_AF_XDP_IFACE_ARG "iface" #define ETH_AF_XDP_QUEUE_IDX_ARG "queue" +#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_QUEUE_IDX_ARG, + ETH_AF_XDP_PMD_ZC_ARG, NULL }; @@ -166,6 +170,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) return 0; } +static void +umem_buf_release_to_fq(void *addr, void *opaque) +{ + struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; + uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; + + rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); +} + static uint16_t eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -175,6 +188,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_prod *fq = &umem->fq; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; + int pmd_zc = umem->pmd_zc; struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; unsigned long dropped = 0; unsigned long rx_bytes = 0; @@ -197,19 +211,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) uint64_t addr; uint32_t len; void *pkt; + uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; + struct rte_mbuf_ext_shared_info *shinfo; desc = xsk_ring_cons__rx_desc(rx, idx_rx++); addr = desc->addr; len = desc->len; pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); + if (pmd_zc) { + shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, + &buf_len, umem_buf_release_to_fq, umem); + + rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, + shinfo); + } else { + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), + pkt, len); + rte_ring_enqueue(umem->buf_ring, (void *)addr); + } rte_pktmbuf_pkt_len(mbufs[i]) = len; rte_pktmbuf_data_len(mbufs[i]) = len; rx_bytes += len; bufs[i] = mbufs[i]; - - rte_ring_enqueue(umem->buf_ring, (void *)addr); } xsk_ring_cons__release(rx, rcvd); @@ -262,12 +286,21 @@ kick_tx(struct pkt_tx_queue *txq) pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); } +static inline bool +in_umem_range(struct xsk_umem_info *umem, uint64_t addr) +{ + uint64_t mz_base_addr = umem->mz->addr_64; + + return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; +} + static uint16_t eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; @@ -294,16 +327,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); mbuf = bufs[i]; - - desc->addr = (uint64_t)addrs[i]; desc->len = mbuf->pkt_len; - pkt = xsk_umem__get_data(umem->mz->addr, - desc->addr); - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), - desc->len); - tx_bytes += mbuf->pkt_len; - rte_pktmbuf_free(mbuf); + /* + * We need to make sure the external mbuf address is within + * current port's umem memzone range + */ + if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && + in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { + desc->addr = (uint64_t)mbuf->buf_addr - + umem->mz->addr_64; + mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, + (uint64_t)addrs[i]); + } else { + desc->addr = (uint64_t)addrs[i]; + pkt = xsk_umem__get_data(umem->mz->addr, + desc->addr); + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), + desc->len); + } + tx_bytes += mbuf->pkt_len; } xsk_ring_prod__submit(&txq->tx, nb_pkts); @@ -313,6 +356,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) txq->stats.tx_pkts += nb_pkts; txq->stats.tx_bytes += tx_bytes; + for (i = 0; i < nb_pkts; i++) + rte_pktmbuf_free(bufs[i]); + return nb_pkts; } @@ -640,6 +686,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, } internals->umem = rxq->umem; + internals->umem->pmd_zc = internals->pmd_zc; + + if (internals->umem->pmd_zc) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -775,9 +825,8 @@ parse_name_arg(const char *key __rte_unused, } static int -parse_parameters(struct rte_kvargs *kvlist, - char *if_name, - int *queue_idx) +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, + int *pmd_zc) { int ret; @@ -791,6 +840,11 @@ parse_parameters(struct rte_kvargs *kvlist, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, + &parse_integer_arg, pmd_zc); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -827,9 +881,8 @@ get_iface_info(const char *if_name, } static struct rte_eth_dev * -init_internals(struct rte_vdev_device *dev, - const char *if_name, - int queue_idx) +init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, + int pmd_zc) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -843,6 +896,7 @@ init_internals(struct rte_vdev_device *dev, return NULL; internals->queue_idx = queue_idx; + internals->pmd_zc = pmd_zc; strlcpy(internals->if_name, if_name, IFNAMSIZ); for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { @@ -883,6 +937,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; struct rte_eth_dev *eth_dev = NULL; const char *name; + int pmd_zc = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -909,7 +964,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); - if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) { + if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -919,7 +974,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } - eth_dev = init_internals(dev, if_name, xsk_queue_idx); + eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -961,7 +1016,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " - "queue= "); + "queue= " + "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) {