From patchwork Thu Jan 25 15:00:24 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 34477 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 84C101B016; Thu, 25 Jan 2018 16:13:48 +0100 (CET) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 6ADD7199B5 for ; Thu, 25 Jan 2018 16:13:47 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@mellanox.com) with ESMTPS (AES256-SHA encrypted); 25 Jan 2018 17:13:46 +0200 Received: from dev-r630-05.mtbc.labs.mlnx (dev-r630-05.mtbc.labs.mlnx [10.12.205.160]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id w0PFDjTY010608; Thu, 25 Jan 2018 17:13:46 +0200 Received: from dev-r630-05.mtbc.labs.mlnx (localhost [127.0.0.1]) by dev-r630-05.mtbc.labs.mlnx (8.14.7/8.14.7) with ESMTP id w0PF0fYf160234; Thu, 25 Jan 2018 23:00:41 +0800 Received: (from xuemingl@localhost) by dev-r630-05.mtbc.labs.mlnx (8.14.7/8.14.7/Submit) id w0PF0Y9s160233; Thu, 25 Jan 2018 23:00:34 +0800 From: Xueming Li To: Nelio Laranjeiro Cc: Xueming Li , Shahaf Shuler , dev@dpdk.org Date: Thu, 25 Jan 2018 23:00:24 +0800 Message-Id: <1516892424-160194-1-git-send-email-xuemingl@mellanox.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <20180119150854.89828-1-xuemingl@mellanox.com> References: <20180119150854.89828-1-xuemingl@mellanox.com> Subject: [dpdk-dev] [PATCH v2] net/mlx5: mmap uar address around huge pages X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Reserving the memory space for the UAR near huge pages helps to **reduce** the cases where the secondary process cannot start. Those pages being physical pages they must be mapped at the same virtual address as in the primary process to have a working secondary process. As this remap is almost the latest being done by the processes (libraries, heaps, stacks are already loaded), similar to huge pages, there is **no guarantee** this mechanism will always work. Signed-off-by: Xueming Li Acked-by: Nelio Laranjeiro --- drivers/net/mlx5/mlx5.c | 114 ++++++++++++++++++++++++++++++++++++++-- drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_defs.h | 10 ++++ drivers/net/mlx5/mlx5_rxtx.h | 3 +- drivers/net/mlx5/mlx5_trigger.c | 7 ++- drivers/net/mlx5/mlx5_txq.c | 51 +++++++++++++----- 6 files changed, 167 insertions(+), 19 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index bc4b6ba..7c0cd28 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -39,6 +39,7 @@ #include #include #include +#include /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -56,6 +57,7 @@ #include #include #include +#include #include #include "mlx5.h" @@ -478,6 +480,106 @@ static struct rte_pci_driver mlx5_driver; +/* + * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process + * local resource used by both primary and secondary to avoid duplicate + * reservation. + * The space has to be available on both primary and secondary process, + * TXQ UAR maps to this area using fixed mmap w/o double check. + */ +static void *uar_base; + +/** + * Reserve UAR address space for primary process. + * + * @param[in] priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_uar_init_primary(struct priv *priv) +{ + void *addr = (void *)0; + int i; + const struct rte_mem_config *mcfg; + int ret; + + if (uar_base) { /* UAR address space mapped. */ + priv->uar_base = uar_base; + return 0; + } + /* find out lower bound of hugepage segments */ + mcfg = rte_eal_get_configuration()->mem_config; + for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) { + if (addr) + addr = RTE_MIN(addr, mcfg->memseg[i].addr); + else + addr = mcfg->memseg[i].addr; + } + /* keep distance to hugepages to minimize potential conflicts. */ + addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE); + /* anonymous mmap, no real memory consumption. */ + addr = mmap(addr, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("Failed to reserve UAR address space, please adjust " + "MLX5_UAR_SIZE or try --base-virtaddr"); + ret = ENOMEM; + return ret; + } + /* Accept either same addr or a new addr returned from mmap if target + * range occupied. + */ + INFO("Reserved UAR address space: %p", addr); + priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */ + uar_base = addr; /* process local, don't reserve again. */ + return 0; +} + +/** + * Reserve UAR address space for secondary process, align with + * primary process. + * + * @param[in] priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_uar_init_secondary(struct priv *priv) +{ + void *addr; + int ret; + + assert(priv->uar_base); + if (uar_base) { /* already reserved. */ + assert(uar_base == priv->uar_base); + return 0; + } + /* anonymous mmap, no real memory consumption. */ + addr = mmap(priv->uar_base, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("UAR mmap failed: %p size: %llu", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + if (priv->uar_base != addr) { + ERROR("UAR address %p size %llu occupied, please adjust " + "MLX5_UAR_OFFSET or try EAL parameter --base-virtaddr", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + uar_base = addr; /* process local, don't reserve again */ + INFO("Reserved UAR address space: %p", addr); + return 0; +} + /** * DPDK callback to register a PCI device. * @@ -660,6 +762,11 @@ eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx5_dev_sec_ops; priv = eth_dev->data->dev_private; + err = priv_uar_init_secondary(priv); + if (err < 0) { + err = -err; + goto error; + } /* Receive command fd from primary process */ err = priv_socket_connect(priv); if (err < 0) { @@ -668,10 +775,8 @@ } /* Remap UAR for Tx queues. */ err = priv_tx_uar_remap(priv, err); - if (err < 0) { - err = -err; + if (err) goto error; - } /* * Ethdev pointer is still required as input since * the primary device is not accessible from the @@ -817,6 +922,9 @@ WARN("Rx CQE compression isn't supported"); config.cqe_comp = 0; } + err = priv_uar_init_primary(priv); + if (err) + goto port_error; /* Configure the first MAC address by default. */ if (priv_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx5_en loaded?" diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a7ec607..ab40914 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -180,6 +180,7 @@ struct priv { struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ int primary_socket; /* Unix socket for primary process. */ + void *uar_base; /* Reserved address space for UAR mapping */ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */ struct mlx5_dev_config config; /* Device configuration. */ struct mlx5_verbs_alloc_ctx verbs_alloc_ctx; diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index a71db28..7f6ae56 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -110,4 +110,14 @@ /* Supported RSS */ #define MLX5_RSS_HF_MASK (~(ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP)) +/* Reserved address space for UAR mapping. */ +#define MLX5_UAR_SIZE (1ULL << 32) + +/* Offset of reserved UAR address space to hugepage memory. Offset is used here + * to minimize possibility of address next to hugepage being used by other code + * in either primary or secondary process, failing to map TX UAR would make TX + * packets invisible to HW. + */ +#define MLX5_UAR_OFFSET (1ULL << 32) + #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 2eb2f05..278bcf2 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -205,7 +205,7 @@ struct mlx5_txq_data { volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ - volatile void *bf_reg; /* Blueflame register. */ + volatile void *bf_reg; /* Blueflame register remapped. */ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ @@ -230,6 +230,7 @@ struct mlx5_txq_ctrl { struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ struct mlx5_txq_data txq; /* Data path structure. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ + volatile void *bf_reg_orig; /* Blueflame register from verbs. */ }; /* mlx5_rxq.c */ diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 827db2e..54819ce 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -76,10 +76,13 @@ goto error; } } - return -ret; + ret = priv_tx_uar_remap(priv, priv->ctx->cmd_fd); + if (ret) + goto error; + return ret; error: priv_txq_stop(priv); - return -ret; + return ret; } static void diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 18321c3..65086d3 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -288,7 +288,9 @@ /** - * Map locally UAR used in Tx queues for BlueFlame doorbell. + * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. + * Both primary and secondary process do mmap to make UAR address + * aligned. * * @param[in] priv * Pointer to private structure. @@ -305,11 +307,14 @@ uintptr_t pages[priv->txqs_n]; unsigned int pages_n = 0; uintptr_t uar_va; + uintptr_t off; void *addr; + void *ret; struct mlx5_txq_data *txq; struct mlx5_txq_ctrl *txq_ctrl; int already_mapped; size_t page_size = sysconf(_SC_PAGESIZE); + int r; memset(pages, 0, priv->txqs_n * sizeof(uintptr_t)); /* @@ -320,8 +325,10 @@ for (i = 0; i != priv->txqs_n; ++i) { txq = (*priv->txqs)[i]; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - uar_va = (uintptr_t)txq_ctrl->txq.bf_reg; - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); + /* UAR addr form verbs used to find dup and offset in page. */ + uar_va = (uintptr_t)txq_ctrl->bf_reg_orig; + off = uar_va & (page_size - 1); /* offset in page. */ + uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ already_mapped = 0; for (j = 0; j != pages_n; ++j) { if (pages[j] == uar_va) { @@ -329,16 +336,30 @@ break; } } - if (already_mapped) - continue; - pages[pages_n++] = uar_va; - addr = mmap((void *)uar_va, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq_ctrl->uar_mmap_offset); - if (addr != (void *)uar_va) { - ERROR("call to mmap failed on UAR for txq %d\n", i); - return -1; + /* new address in reserved UAR address space. */ + addr = RTE_PTR_ADD(priv->uar_base, + uar_va & (MLX5_UAR_SIZE - 1)); + if (!already_mapped) { + pages[pages_n++] = uar_va; + /* fixed mmap to specified address in reserved + * address space. + */ + ret = mmap(addr, page_size, + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, + txq_ctrl->uar_mmap_offset); + if (ret != addr) { + /* fixed mmap have to return same address */ + ERROR("call to mmap failed on UAR for txq %d\n", + i); + r = ENXIO; + return r; + } } + if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */ + txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off); + else + assert(txq_ctrl->txq.bf_reg == + RTE_PTR_ADD((void *)addr, off)); } return 0; } @@ -505,7 +526,7 @@ struct mlx5_txq_ibv* txq_data->wqes = qp.sq.buf; txq_data->wqe_n = log2above(qp.sq.wqe_cnt); txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; - txq_data->bf_reg = qp.bf.reg; + txq_ctrl->bf_reg_orig = qp.bf.reg; txq_data->cq_db = cq_info.dbrec; txq_data->cqes = (volatile struct mlx5_cqe (*)[]) @@ -840,6 +861,7 @@ struct mlx5_txq_ctrl* { unsigned int i; struct mlx5_txq_ctrl *txq; + size_t page_size = sysconf(_SC_PAGESIZE); if (!(*priv->txqs)[idx]) return 0; @@ -859,6 +881,9 @@ struct mlx5_txq_ctrl* txq->txq.mp2mr[i] = NULL; } } + if (priv->uar_base) + munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, + page_size), page_size); if (rte_atomic32_dec_and_test(&txq->refcnt)) { txq_free_elts(txq); LIST_REMOVE(txq, next);