From patchwork Tue Dec 19 11:14:41 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 32460 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4131A1B1B8; Tue, 19 Dec 2017 12:15:13 +0100 (CET) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id C2D831B021 for ; Tue, 19 Dec 2017 12:14:55 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 19 Dec 2017 03:14:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,426,1508828400"; d="scan'208";a="13553692" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga003.jf.intel.com with ESMTP; 19 Dec 2017 03:14:53 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id vBJBEqtv003126; Tue, 19 Dec 2017 11:14:52 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id vBJBEq7c010280; Tue, 19 Dec 2017 11:14:52 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id vBJBEq1i010276; Tue, 19 Dec 2017 11:14:52 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: andras.kovacs@ericsson.com, laszlo.vadkeri@ericsson.com, keith.wiles@intel.com, benjamin.walker@intel.com, bruce.richardson@intel.com, thomas@monjalon.net Date: Tue, 19 Dec 2017 11:14:41 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [RFC v2 14/23] eal: add support for dynamic unmapping of pages X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This isn't used anywhere yet, but the support is now there. Also, adding cleanup to allocation procedures, so that if we fail to allocate everything we asked for, we can free all of it back. Signed-off-by: Anatoly Burakov --- lib/librte_eal/common/eal_memalloc.h | 3 + lib/librte_eal/linuxapp/eal/eal_memalloc.c | 131 ++++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h index 59fd330..47e4367 100755 --- a/lib/librte_eal/common/eal_memalloc.h +++ b/lib/librte_eal/common/eal_memalloc.h @@ -44,4 +44,7 @@ int eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size, int socket, bool exact); +int +eal_memalloc_free_page(struct rte_memseg *ms); + #endif // EAL_MEMALLOC_H diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c index 527c2f6..13172a0 100755 --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -109,6 +109,18 @@ huge_recover_sigbus(void) } } +/* + * uses fstat to report the size of a file on disk + */ +static bool +is_zero_length(int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return false; + return st.st_blocks == 0; +} + #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES static bool prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) { @@ -267,6 +279,61 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id, return ret; } +static int +free_page(struct rte_memseg *ms, struct hugepage_info *hi, unsigned list_idx, + unsigned seg_idx) { + uint64_t fa_offset; + char path[PATH_MAX]; + int fd; + + fa_offset = seg_idx * ms->hugepage_sz; + + if (internal_config.single_file_segments) { + eal_get_hugefile_path(path, sizeof(path), hi->hugedir, list_idx); + } else { + eal_get_hugefile_path(path, sizeof(path), hi->hugedir, + list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx); + } + + munmap(ms->addr, ms->hugepage_sz); + + // TODO: race condition? + + if (mmap(ms->addr, ms->hugepage_sz, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == + MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "couldn't unmap page\n"); + return -1; + } + + if (internal_config.single_file_segments) { + /* now, truncate or remove the original file */ + fd = open(path, O_RDWR, 0600); + if (fd < 0) { + RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, + strerror(errno)); + // TODO: proper error handling + return -1; + } + + if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + fa_offset, ms->hugepage_sz)) { + RTE_LOG(DEBUG, EAL, "Page deallocation failed: %s\n", + strerror(errno)); + } + if (is_zero_length(fd)) { + unlink(path); + } + close(fd); + } else { + unlink(path); + } + + memset(ms, 0, sizeof(*ms)); + + return 0; +} + int eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size, int socket, bool exact) { @@ -274,7 +341,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, struct rte_memseg_list *msl = NULL; void *addr; unsigned msl_idx; - int cur_idx, next_idx, end_idx, i, ret = 0; + int cur_idx, next_idx, start_idx, end_idx, i, j, ret = 0; #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES bool have_numa; int oldpolicy; @@ -366,6 +433,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, } end_idx = cur_idx + n; + start_idx = cur_idx; #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES have_numa = prepare_numa(&oldpolicy, oldmask, socket); @@ -387,6 +455,20 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, ret = i; goto restore_numa; } + RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n", + i); + + /* clean up */ + for (j = start_idx; j < cur_idx; j++) { + struct rte_memseg *tmp; + struct rte_fbarray *arr = &msl->memseg_arr; + + tmp = rte_fbarray_get(arr, j); + if (free_page(tmp, hi, msl_idx, start_idx + j)) + rte_panic("Cannot free page\n"); + + rte_fbarray_set_used(arr, j, false); + } if (ms) memset(ms, 0, sizeof(struct rte_memseg*) * n); ret = -1; @@ -414,3 +496,50 @@ eal_memalloc_alloc_page(uint64_t size, int socket) { return NULL; return ms; } + +int +eal_memalloc_free_page(struct rte_memseg *ms) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl = NULL; + unsigned msl_idx, seg_idx; + struct hugepage_info *hi = NULL; + + /* dynamic free not supported in legacy mode */ + if (internal_config.legacy_mem) + return -1; + + for (int i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) { + if (ms->hugepage_sz == + internal_config.hugepage_info[i].hugepage_sz) { + hi = &internal_config.hugepage_info[i]; + break; + } + } + if (!hi) { + RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n"); + return -1; + } + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + uintptr_t start_addr, end_addr; + struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx]; + + start_addr = (uintptr_t) cur->base_va; + end_addr = start_addr + + cur->memseg_arr.capacity * cur->hugepage_sz; + + if ((uintptr_t) ms->addr < start_addr || + (uintptr_t) ms->addr >= end_addr) { + continue; + } + msl = cur; + seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz; + break; + } + if (!msl) { + RTE_LOG(ERR, EAL, "Couldn't find memseg list\n"); + return -1; + } + rte_fbarray_set_used(&msl->memseg_arr, seg_idx, false); + return free_page(ms, hi, msl_idx, seg_idx); +}