From patchwork Wed May 15 08:38:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiaolong Ye X-Patchwork-Id: 53424 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id E85385F13; Wed, 15 May 2019 10:49:48 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id C225CA3 for ; Wed, 15 May 2019 10:49:46 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2019 01:49:45 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga008.fm.intel.com with ESMTP; 15 May 2019 01:49:43 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: Karlsson Magnus , Topel Bjorn , dev@dpdk.org Date: Wed, 15 May 2019 16:38:40 +0800 Message-Id: <20190515083842.15116-2-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com> References: <20190515083842.15116-1-xiaolong.ye@intel.com> Subject: [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Implement zero copy of af_xdp pmd through mbuf's external memory mechanism to achieve high performance. This patch also provides a new parameter "pmd_zero_copy" for user, so they can choose to enable zero copy of af_xdp pmd or not. To be clear, "zero copy" here is different from the "zero copy mode" of AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk application. Suggested-by: Varghese Vipin Suggested-by: Olivier Matz Signed-off-by: Xiaolong Ye --- doc/guides/nics/af_xdp.rst | 1 + drivers/net/af_xdp/rte_eth_af_xdp.c | 96 +++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 20 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 175038153..0bd4239fe 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``queue`` - netdev queue id (optional, default 0); +* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites ------------- diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 35c72272c..ebef7bf34 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -70,6 +70,7 @@ struct xsk_umem_info { struct xsk_umem *umem; struct rte_ring *buf_ring; const struct rte_memzone *mz; + int pmd_zc; }; struct rx_stats { @@ -109,6 +110,7 @@ struct pmd_internals { int if_index; char if_name[IFNAMSIZ]; uint16_t queue_idx; + int pmd_zc; struct ether_addr eth_addr; struct xsk_umem_info *umem; struct rte_mempool *mb_pool_share; @@ -119,10 +121,12 @@ struct pmd_internals { #define ETH_AF_XDP_IFACE_ARG "iface" #define ETH_AF_XDP_QUEUE_IDX_ARG "queue" +#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_QUEUE_IDX_ARG, + ETH_AF_XDP_PMD_ZC_ARG, NULL }; @@ -166,6 +170,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size) return 0; } +static void +umem_buf_release_to_fq(void *addr, void *opaque) +{ + struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque; + uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64; + + rte_ring_enqueue(umem->buf_ring, (void *)umem_addr); +} + static uint16_t eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -175,6 +188,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_prod *fq = &umem->fq; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; + int pmd_zc = umem->pmd_zc; struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; unsigned long dropped = 0; unsigned long rx_bytes = 0; @@ -197,19 +211,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) uint64_t addr; uint32_t len; void *pkt; + uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE; + struct rte_mbuf_ext_shared_info *shinfo; desc = xsk_ring_cons__rx_desc(rx, idx_rx++); addr = desc->addr; len = desc->len; pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr); - rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len); + if (pmd_zc) { + shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt, + &buf_len, umem_buf_release_to_fq, umem); + + rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len, + shinfo); + } else { + rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), + pkt, len); + rte_ring_enqueue(umem->buf_ring, (void *)addr); + } rte_pktmbuf_pkt_len(mbufs[i]) = len; rte_pktmbuf_data_len(mbufs[i]) = len; rx_bytes += len; bufs[i] = mbufs[i]; - - rte_ring_enqueue(umem->buf_ring, (void *)addr); } xsk_ring_cons__release(rx, rcvd); @@ -262,12 +286,21 @@ kick_tx(struct pkt_tx_queue *txq) pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE); } +static inline bool +in_umem_range(struct xsk_umem_info *umem, uint64_t addr) +{ + uint64_t mz_base_addr = umem->mz->addr_64; + + return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len; +} + static uint16_t eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; @@ -294,16 +327,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i); mbuf = bufs[i]; - - desc->addr = (uint64_t)addrs[i]; desc->len = mbuf->pkt_len; - pkt = xsk_umem__get_data(umem->mz->addr, - desc->addr); - rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), - desc->len); - tx_bytes += mbuf->pkt_len; - rte_pktmbuf_free(mbuf); + /* + * We need to make sure the external mbuf address is within + * current port's umem memzone range + */ + if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) && + in_umem_range(umem, (uint64_t)mbuf->buf_addr)) { + desc->addr = (uint64_t)mbuf->buf_addr - + umem->mz->addr_64; + mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr, + (uint64_t)addrs[i]); + } else { + desc->addr = (uint64_t)addrs[i]; + pkt = xsk_umem__get_data(umem->mz->addr, + desc->addr); + rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), + desc->len); + } + tx_bytes += mbuf->pkt_len; } xsk_ring_prod__submit(&txq->tx, nb_pkts); @@ -313,6 +356,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) txq->stats.tx_pkts += nb_pkts; txq->stats.tx_bytes += tx_bytes; + for (i = 0; i < nb_pkts; i++) + rte_pktmbuf_free(bufs[i]); + return nb_pkts; } @@ -640,6 +686,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, } internals->umem = rxq->umem; + internals->umem->pmd_zc = internals->pmd_zc; + + if (internals->umem->pmd_zc) + AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -775,9 +825,8 @@ parse_name_arg(const char *key __rte_unused, } static int -parse_parameters(struct rte_kvargs *kvlist, - char *if_name, - int *queue_idx) +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, + int *pmd_zc) { int ret; @@ -791,6 +840,11 @@ parse_parameters(struct rte_kvargs *kvlist, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, + &parse_integer_arg, pmd_zc); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -827,9 +881,8 @@ get_iface_info(const char *if_name, } static struct rte_eth_dev * -init_internals(struct rte_vdev_device *dev, - const char *if_name, - int queue_idx) +init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, + int pmd_zc) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -843,6 +896,7 @@ init_internals(struct rte_vdev_device *dev, return NULL; internals->queue_idx = queue_idx; + internals->pmd_zc = pmd_zc; strlcpy(internals->if_name, if_name, IFNAMSIZ); for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { @@ -883,6 +937,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; struct rte_eth_dev *eth_dev = NULL; const char *name; + int pmd_zc = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -909,7 +964,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); - if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) { + if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -919,7 +974,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } - eth_dev = init_internals(dev, if_name, xsk_queue_idx); + eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -961,7 +1016,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " - "queue= "); + "queue= " + "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) { From patchwork Wed May 15 08:38:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiaolong Ye X-Patchwork-Id: 53425 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 423F25F20; Wed, 15 May 2019 10:49:51 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 6E221A3 for ; Wed, 15 May 2019 10:49:48 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2019 01:49:48 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga008.fm.intel.com with ESMTP; 15 May 2019 01:49:46 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: Karlsson Magnus , Topel Bjorn , dev@dpdk.org Date: Wed, 15 May 2019 16:38:41 +0800 Message-Id: <20190515083842.15116-3-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com> References: <20190515083842.15116-1-xiaolong.ye@intel.com> Subject: [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds two parameters `start_queue` and `queue_count` to specify the range of netdev queues used by AF_XDP pmd. Signed-off-by: Xiaolong Ye --- doc/guides/nics/af_xdp.rst | 3 +- drivers/net/af_xdp/rte_eth_af_xdp.c | 88 +++++++++++++++-------------- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 0bd4239fe..18defcda3 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -27,7 +27,8 @@ Options The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); -* ``queue`` - netdev queue id (optional, default 0); +* ``start_queue`` - starting netdev queue id (optional, default 0); +* ``queue_count`` - total netdev queue number (optional, default 1); * ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index ebef7bf34..9a4510701 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -57,7 +57,8 @@ static int af_xdp_logtype; #define ETH_AF_XDP_NUM_BUFFERS 4096 #define ETH_AF_XDP_DATA_HEADROOM 0 #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS -#define ETH_AF_XDP_DFLT_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 +#define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 #define ETH_AF_XDP_RX_BATCH_SIZE 32 #define ETH_AF_XDP_TX_BATCH_SIZE 32 @@ -88,7 +89,7 @@ struct pkt_rx_queue { struct rx_stats stats; struct pkt_tx_queue *pair; - uint16_t queue_idx; + int xsk_queue_idx; }; struct tx_stats { @@ -103,13 +104,15 @@ struct pkt_tx_queue { struct tx_stats stats; struct pkt_rx_queue *pair; - uint16_t queue_idx; + int xsk_queue_idx; }; struct pmd_internals { int if_index; char if_name[IFNAMSIZ]; - uint16_t queue_idx; + int start_queue_idx; + int queue_cnt; + int pmd_zc; struct ether_addr eth_addr; struct xsk_umem_info *umem; @@ -120,12 +123,14 @@ struct pmd_internals { }; #define ETH_AF_XDP_IFACE_ARG "iface" -#define ETH_AF_XDP_QUEUE_IDX_ARG "queue" +#define ETH_AF_XDP_START_QUEUE_ARG "start_queue" +#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" #define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, - ETH_AF_XDP_QUEUE_IDX_ARG, + ETH_AF_XDP_START_QUEUE_ARG, + ETH_AF_XDP_QUEUE_COUNT_ARG, ETH_AF_XDP_PMD_ZC_ARG, NULL }; @@ -395,8 +400,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = ETH_FRAME_LEN; - dev_info->max_rx_queues = 1; - dev_info->max_tx_queues = 1; + dev_info->max_rx_queues = internals->queue_cnt; + dev_info->max_tx_queues = internals->queue_cnt; dev_info->min_mtu = ETHER_MIN_MTU; dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM; @@ -528,7 +533,8 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused, } static struct -xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) +xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + struct pkt_rx_queue *rxq) { struct xsk_umem_info *umem; const struct rte_memzone *mz; @@ -549,7 +555,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) } snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u", - internals->if_name, internals->queue_idx); + internals->if_name, rxq->xsk_queue_idx); umem->buf_ring = rte_ring_create(ring_name, ETH_AF_XDP_NUM_BUFFERS, rte_socket_id(), @@ -565,7 +571,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals) ETH_AF_XDP_DATA_HEADROOM)); snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u", - internals->if_name, internals->queue_idx); + internals->if_name, rxq->xsk_queue_idx); mz = rte_memzone_reserve_aligned(mz_name, ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE, rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG, @@ -602,7 +608,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, int ret = 0; int reserve_size; - rxq->umem = xdp_umem_configure(internals); + rxq->umem = xdp_umem_configure(internals, rxq); if (rxq->umem == NULL) return -ENOMEM; @@ -612,7 +618,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; ret = xsk_socket__create(&rxq->xsk, internals->if_name, - internals->queue_idx, rxq->umem->umem, &rxq->rx, + rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx, &txq->tx, &cfg); if (ret) { AF_XDP_LOG(ERR, "Failed to create xsk socket.\n"); @@ -635,20 +641,6 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, return ret; } -static void -queue_reset(struct pmd_internals *internals, uint16_t queue_idx) -{ - struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx]; - struct pkt_tx_queue *txq = rxq->pair; - - memset(rxq, 0, sizeof(*rxq)); - memset(txq, 0, sizeof(*txq)); - rxq->pair = txq; - txq->pair = rxq; - rxq->queue_idx = queue_idx; - txq->queue_idx = queue_idx; -} - static int eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, @@ -663,8 +655,9 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, int ret; rxq = &internals->rx_queues[rx_queue_id]; - queue_reset(internals, rx_queue_id); + AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n", + rx_queue_id, rxq->xsk_queue_idx); /* Now get the space available for data in the mbuf */ buf_size = rte_pktmbuf_data_room_size(mb_pool) - RTE_PKTMBUF_HEADROOM; @@ -695,7 +688,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, return 0; err: - queue_reset(internals, rx_queue_id); return ret; } @@ -825,8 +817,8 @@ parse_name_arg(const char *key __rte_unused, } static int -parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, - int *pmd_zc) +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, + int *queue_cnt, int *pmd_zc) { int ret; @@ -835,11 +827,18 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx, if (ret < 0) goto free_kvlist; - ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IDX_ARG, - &parse_integer_arg, queue_idx); + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG, + &parse_integer_arg, start_queue); if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG, + &parse_integer_arg, queue_cnt); + if (ret < 0 || *queue_cnt > ETH_AF_XDP_MAX_QUEUE_PAIRS) { + ret = -EINVAL; + goto free_kvlist; + } + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG, &parse_integer_arg, pmd_zc); if (ret < 0) @@ -881,8 +880,8 @@ get_iface_info(const char *if_name, } static struct rte_eth_dev * -init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, - int pmd_zc) +init_internals(struct rte_vdev_device *dev, const char *if_name, + int start_queue_idx, int queue_cnt, int pmd_zc) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -895,13 +894,16 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx, if (internals == NULL) return NULL; - internals->queue_idx = queue_idx; + internals->start_queue_idx = start_queue_idx; + internals->queue_cnt = queue_cnt; internals->pmd_zc = pmd_zc; strlcpy(internals->if_name, if_name, IFNAMSIZ); - for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) { + for (i = 0; i < queue_cnt; i++) { internals->tx_queues[i].pair = &internals->rx_queues[i]; internals->rx_queues[i].pair = &internals->tx_queues[i]; + internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i; + internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i; } ret = get_iface_info(if_name, &internals->eth_addr, @@ -934,7 +936,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist; char if_name[IFNAMSIZ] = {'\0'}; - int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX; + int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX; + int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; struct rte_eth_dev *eth_dev = NULL; const char *name; int pmd_zc = 0; @@ -964,7 +967,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) if (dev->device.numa_node == SOCKET_ID_ANY) dev->device.numa_node = rte_socket_id(); - if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) { + if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, + &xsk_queue_cnt, &pmd_zc) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -974,7 +978,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } - eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc); + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, + xsk_queue_cnt, pmd_zc); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1016,7 +1021,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = { RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv); RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " - "queue= " + "start_queue= " + "queue_count= " "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log) From patchwork Wed May 15 08:38:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiaolong Ye X-Patchwork-Id: 53426 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id AD9435F2B; Wed, 15 May 2019 10:49:53 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 02B685F1B for ; Wed, 15 May 2019 10:49:50 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2019 01:49:50 -0700 X-ExtLoop1: 1 Received: from yexl-server.sh.intel.com (HELO NPG-DPDK-XDP-yexl-server.sh.intel.com) ([10.67.110.206]) by fmsmga008.fm.intel.com with ESMTP; 15 May 2019 01:49:49 -0700 From: Xiaolong Ye To: Xiaolong Ye , Qi Zhang , John McNamara , Marko Kovacevic Cc: Karlsson Magnus , Topel Bjorn , dev@dpdk.org Date: Wed, 15 May 2019 16:38:42 +0800 Message-Id: <20190515083842.15116-4-xiaolong.ye@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com> References: <20190515083842.15116-1-xiaolong.ye@intel.com> Subject: [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the kernel driver is executed in process context by calling the poll() syscall. The main advantage of busy-poll feature is that all processing occurs on a single core. This eliminates the core-to-core cache transfers that occur between the application and the softirqd processing on another core. The drawback of busy-poll is that it will downgrade the max throughput due to syscall, but from a per-core perspective, the performance is better as normal mode runs on two cores and busy-poll only runs on a single core. Signed-off-by: Xiaolong Ye --- doc/guides/nics/af_xdp.rst | 1 + drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 18defcda3..e42065170 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -29,6 +29,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``iface`` - name of the Kernel interface to attach to (required); * ``start_queue`` - starting netdev queue id (optional, default 0); * ``queue_count`` - total netdev queue number (optional, default 1); +* ``busy_poll_size`` - busy poll batch size (optional, default 0); * ``pmd_zero_copy`` - enable zero copy or not (optional, default 0); Prerequisites diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 9a4510701..1e46a4ef4 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ struct xsk_umem_info { struct rte_ring *buf_ring; const struct rte_memzone *mz; int pmd_zc; + int busy_poll; }; struct rx_stats { @@ -114,6 +116,7 @@ struct pmd_internals { int queue_cnt; int pmd_zc; + int busy_poll_size; struct ether_addr eth_addr; struct xsk_umem_info *umem; struct rte_mempool *mb_pool_share; @@ -126,12 +129,14 @@ struct pmd_internals { #define ETH_AF_XDP_START_QUEUE_ARG "start_queue" #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" #define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy" +#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG "busy_poll_size" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, ETH_AF_XDP_START_QUEUE_ARG, ETH_AF_XDP_QUEUE_COUNT_ARG, ETH_AF_XDP_PMD_ZC_ARG, + ETH_AF_XDP_BUSY_POLL_SIZE_ARG, NULL }; @@ -191,6 +196,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct xsk_ring_cons *rx = &rxq->rx; struct xsk_umem_info *umem = rxq->umem; struct xsk_ring_prod *fq = &umem->fq; + struct pollfd pfds[1]; uint32_t idx_rx = 0; uint32_t free_thresh = fq->size >> 1; int pmd_zc = umem->pmd_zc; @@ -199,6 +205,15 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) unsigned long rx_bytes = 0; int rcvd, i; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(rxq->xsk); + pfds[0].events = POLLIN; + + if (poll(pfds, 1, 0) <= 0) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0)) @@ -305,12 +320,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct pkt_tx_queue *txq = queue; struct xsk_umem_info *umem = txq->pair->umem; struct rte_mbuf *mbuf; + struct pollfd pfds[1]; int pmd_zc = umem->pmd_zc; void *addrs[ETH_AF_XDP_TX_BATCH_SIZE]; unsigned long tx_bytes = 0; int i; uint32_t idx_tx; + if (umem->busy_poll) { + memset(pfds, 0, sizeof(pfds)); + pfds[0].fd = xsk_socket__fd(txq->pair->xsk); + pfds[0].events = POLLOUT; + if (poll(pfds, 1, 0) <= 0) + return 0; + if (!(pfds[0].revents & POLLOUT)) + return 0; + } + nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); pull_umem_cq(umem, nb_pkts); @@ -615,6 +641,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, cfg.rx_size = ring_size; cfg.tx_size = ring_size; cfg.libbpf_flags = 0; + cfg.busy_poll = internals->busy_poll_size; cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; cfg.bind_flags = 0; ret = xsk_socket__create(&rxq->xsk, internals->if_name, @@ -680,10 +707,14 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, internals->umem = rxq->umem; internals->umem->pmd_zc = internals->pmd_zc; + internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0; if (internals->umem->pmd_zc) AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n"); + if (internals->umem->busy_poll) + AF_XDP_LOG(INFO, "Busy poll enabled.\n"); + dev->data->rx_queues[rx_queue_id] = rxq; return 0; @@ -818,7 +849,7 @@ parse_name_arg(const char *key __rte_unused, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, - int *queue_cnt, int *pmd_zc) + int *queue_cnt, int *pmd_zc, int *busy_poll_size) { int ret; @@ -844,6 +875,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG, + &parse_integer_arg, busy_poll_size); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -881,7 +917,8 @@ get_iface_info(const char *if_name, static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, - int start_queue_idx, int queue_cnt, int pmd_zc) + int start_queue_idx, int queue_cnt, int pmd_zc, + int busy_poll_size) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -897,6 +934,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->start_queue_idx = start_queue_idx; internals->queue_cnt = queue_cnt; internals->pmd_zc = pmd_zc; + internals->busy_poll_size = busy_poll_size; strlcpy(internals->if_name, if_name, IFNAMSIZ); for (i = 0; i < queue_cnt; i++) { @@ -941,6 +979,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) struct rte_eth_dev *eth_dev = NULL; const char *name; int pmd_zc = 0; + int busy_poll_size = 0; AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n", rte_vdev_device_name(dev)); @@ -968,7 +1007,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) dev->device.numa_node = rte_socket_id(); if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, - &xsk_queue_cnt, &pmd_zc) < 0) { + &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -979,7 +1018,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) } eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, - xsk_queue_cnt, pmd_zc); + xsk_queue_cnt, pmd_zc, busy_poll_size); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1023,6 +1062,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "iface= " "start_queue= " "queue_count= " + "busy_poll_size= " "pmd_zero_copy=<0|1>"); RTE_INIT(af_xdp_init_log)