From patchwork Thu Nov 29 13:48:32 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 48407 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 907461B4CB; Thu, 29 Nov 2018 14:48:42 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 0E53F1B490 for ; Thu, 29 Nov 2018 14:48:38 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 29 Nov 2018 05:48:38 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,294,1539673200"; d="scan'208";a="113540437" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 29 Nov 2018 05:48:36 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wATDmZAQ012498; Thu, 29 Nov 2018 13:48:35 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wATDmZda019971; Thu, 29 Nov 2018 13:48:35 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wATDmZ7k019967; Thu, 29 Nov 2018 13:48:35 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: shahafs@mellanox.com, yskoh@mellanox.com, thomas@monjalon.net, shreyansh.jain@nxp.com Date: Thu, 29 Nov 2018 13:48:32 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 1/4] malloc: separate creating memseg list and malloc heap X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, creating external malloc heap involves also creating a memseg list backing that malloc heap. We need to have them as separate functions, to allow creating memseg lists without creating a malloc heap. Signed-off-by: Anatoly Burakov Acked-by: Yongseok Koh --- lib/librte_eal/common/malloc_heap.c | 34 ++++++++++++++++++----------- lib/librte_eal/common/malloc_heap.h | 9 ++++++-- lib/librte_eal/common/rte_malloc.c | 11 ++++++++-- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index c6a6d4f6b..25693481f 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -1095,9 +1095,10 @@ destroy_seg(struct malloc_elem *elem, size_t len) return 0; } -int -malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr, - rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz) +struct rte_memseg_list * +malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], + unsigned int n_pages, size_t page_sz, const char *seg_name, + unsigned int socket_id) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; char fbarray_name[RTE_FBARRAY_NAME_LEN]; @@ -1117,17 +1118,17 @@ malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr, if (msl == NULL) { RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n"); rte_errno = ENOSPC; - return -1; + return NULL; } snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p", - heap->name, va_addr); + seg_name, va_addr); /* create the backing fbarray */ if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages, sizeof(struct rte_memseg)) < 0) { RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n"); - return -1; + return NULL; } arr = &msl->memseg_arr; @@ -1143,32 +1144,39 @@ malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr, ms->len = page_sz; ms->nchannel = rte_memory_get_nchannel(); ms->nrank = rte_memory_get_nrank(); - ms->socket_id = heap->socket_id; + ms->socket_id = socket_id; } /* set up the memseg list */ msl->base_va = va_addr; msl->page_sz = page_sz; - msl->socket_id = heap->socket_id; + msl->socket_id = socket_id; msl->len = seg_len; msl->version = 0; msl->external = 1; + return msl; +} + +int +malloc_heap_add_external_memory(struct malloc_heap *heap, + struct rte_memseg_list *msl) +{ /* erase contents of new memory */ - memset(va_addr, 0, seg_len); + memset(msl->base_va, 0, msl->len); /* now, add newly minted memory to the malloc heap */ - malloc_heap_add_memory(heap, msl, va_addr, seg_len); + malloc_heap_add_memory(heap, msl, msl->base_va, msl->len); - heap->total_size += seg_len; + heap->total_size += msl->len; /* all done! */ RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n", - heap->name, va_addr); + heap->name, msl->base_va); /* notify all subscribers that a new memory area has been added */ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, - va_addr, seg_len); + msl->base_va, msl->len); return 0; } diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index e48996d52..255a315b8 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -39,9 +39,14 @@ malloc_heap_create(struct malloc_heap *heap, const char *heap_name); int malloc_heap_destroy(struct malloc_heap *heap); +struct rte_memseg_list * +malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], + unsigned int n_pages, size_t page_sz, const char *seg_name, + unsigned int socket_id); + int -malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr, - rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz); +malloc_heap_add_external_memory(struct malloc_heap *heap, + struct rte_memseg_list *msl); int malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 0da5ad5e8..66bfe63c3 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -340,6 +340,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct malloc_heap *heap = NULL; + struct rte_memseg_list *msl; unsigned int n; int ret; @@ -373,9 +374,15 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, goto unlock; } + msl = malloc_heap_create_external_seg(va_addr, iova_addrs, n, page_sz, + heap_name, heap->socket_id); + if (msl == NULL) { + ret = -1; + goto unlock; + } + rte_spinlock_lock(&heap->lock); - ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n, - page_sz); + ret = malloc_heap_add_external_memory(heap, msl); rte_spinlock_unlock(&heap->lock); unlock: From patchwork Thu Nov 29 13:48:33 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 48406 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 1D3521B4AA; Thu, 29 Nov 2018 14:48:40 +0100 (CET) Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by dpdk.org (Postfix) with ESMTP id EE4F5322C for ; Thu, 29 Nov 2018 14:48:38 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga104.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 29 Nov 2018 05:48:37 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,294,1539673200"; d="scan'208";a="116348114" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga004.fm.intel.com with ESMTP; 29 Nov 2018 05:48:36 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wATDmaF1012502; Thu, 29 Nov 2018 13:48:36 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wATDmZN7019978; Thu, 29 Nov 2018 13:48:35 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wATDmZMA019974; Thu, 29 Nov 2018 13:48:35 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: shahafs@mellanox.com, yskoh@mellanox.com, thomas@monjalon.net, shreyansh.jain@nxp.com Date: Thu, 29 Nov 2018 13:48:33 +0000 Message-Id: <54cb382e91119cfbef7c34db4406f8ea39428511.1543495935.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 2/4] malloc: separate destroying memseg list and heap data X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, destroying external heap chunk and its memseg list is part of one process. When we will gain the ability to unregister external memory from DPDK that doesn't have any heap structures associated with it, we need to be able to find and destroy memseg lists as well as heap data separately. Signed-off-by: Anatoly Burakov Acked-by: Yongseok Koh --- lib/librte_eal/common/malloc_heap.c | 70 +++++++++++++++---- lib/librte_eal/common/malloc_heap.h | 6 ++ lib/librte_eal/common/rte_malloc.c | 104 ++++++++++------------------ 3 files changed, 102 insertions(+), 78 deletions(-) diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 25693481f..fa0cb0799 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -1067,12 +1067,9 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f) } static int -destroy_seg(struct malloc_elem *elem, size_t len) +destroy_elem(struct malloc_elem *elem, size_t len) { struct malloc_heap *heap = elem->heap; - struct rte_memseg_list *msl; - - msl = elem->msl; /* notify all subscribers that a memory area is going to be removed */ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len); @@ -1085,13 +1082,6 @@ destroy_seg(struct malloc_elem *elem, size_t len) memset(elem, 0, sizeof(*elem)); - /* destroy the fbarray backing this memory */ - if (rte_fbarray_destroy(&msl->memseg_arr) < 0) - return -1; - - /* reset the memseg list */ - memset(msl, 0, sizeof(*msl)); - return 0; } @@ -1158,6 +1148,62 @@ malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], return msl; } +struct extseg_walk_arg { + void *va_addr; + size_t len; + struct rte_memseg_list *msl; +}; + +static int +extseg_walk(const struct rte_memseg_list *msl, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct extseg_walk_arg *wa = arg; + + if (msl->base_va == wa->va_addr && msl->len == wa->len) { + unsigned int found_idx; + + /* msl is const */ + found_idx = msl - mcfg->memsegs; + wa->msl = &mcfg->memsegs[found_idx]; + return 1; + } + return 0; +} + +struct rte_memseg_list * +malloc_heap_find_external_seg(void *va_addr, size_t len) +{ + struct extseg_walk_arg wa; + int res; + + wa.va_addr = va_addr; + wa.len = len; + + res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa); + + if (res != 1) { + /* 0 means nothing was found, -1 shouldn't happen */ + if (res == 0) + rte_errno = ENOENT; + return NULL; + } + return wa.msl; +} + +int +malloc_heap_destroy_external_seg(struct rte_memseg_list *msl) +{ + /* destroy the fbarray backing this memory */ + if (rte_fbarray_destroy(&msl->memseg_arr) < 0) + return -1; + + /* reset the memseg list */ + memset(msl, 0, sizeof(*msl)); + + return 0; +} + int malloc_heap_add_external_memory(struct malloc_heap *heap, struct rte_memseg_list *msl) @@ -1206,7 +1252,7 @@ malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr, rte_errno = EBUSY; return -1; } - return destroy_seg(elem, len); + return destroy_elem(elem, len); } int diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index 255a315b8..ca9ff666f 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -44,6 +44,12 @@ malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz, const char *seg_name, unsigned int socket_id); +struct rte_memseg_list * +malloc_heap_find_external_seg(void *va_addr, size_t len); + +int +malloc_heap_destroy_external_seg(struct rte_memseg_list *msl); + int malloc_heap_add_external_memory(struct malloc_heap *heap, struct rte_memseg_list *msl); diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 66bfe63c3..9a82e3386 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -396,6 +396,7 @@ rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct malloc_heap *heap = NULL; + struct rte_memseg_list *msl; int ret; if (heap_name == NULL || va_addr == NULL || len == 0 || @@ -420,9 +421,19 @@ rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len) goto unlock; } + msl = malloc_heap_find_external_seg(va_addr, len); + if (msl == NULL) { + ret = -1; + goto unlock; + } + rte_spinlock_lock(&heap->lock); ret = malloc_heap_remove_external_memory(heap, va_addr, len); rte_spinlock_unlock(&heap->lock); + if (ret != 0) + goto unlock; + + ret = malloc_heap_destroy_external_seg(msl); unlock: rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); @@ -430,63 +441,12 @@ rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len) return ret; } -struct sync_mem_walk_arg { - void *va_addr; - size_t len; - int result; - bool attach; -}; - -static int -sync_mem_walk(const struct rte_memseg_list *msl, void *arg) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct sync_mem_walk_arg *wa = arg; - size_t len = msl->page_sz * msl->memseg_arr.len; - - if (msl->base_va == wa->va_addr && - len == wa->len) { - struct rte_memseg_list *found_msl; - int msl_idx, ret; - - /* msl is const */ - msl_idx = msl - mcfg->memsegs; - found_msl = &mcfg->memsegs[msl_idx]; - - if (wa->attach) { - ret = rte_fbarray_attach(&found_msl->memseg_arr); - } else { - /* notify all subscribers that a memory area is about to - * be removed - */ - eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, - msl->base_va, msl->len); - ret = rte_fbarray_detach(&found_msl->memseg_arr); - } - - if (ret < 0) { - wa->result = -rte_errno; - } else { - /* notify all subscribers that a new memory area was - * added - */ - if (wa->attach) - eal_memalloc_mem_event_notify( - RTE_MEM_EVENT_ALLOC, - msl->base_va, msl->len); - wa->result = 0; - } - return 1; - } - return 0; -} - static int sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct malloc_heap *heap = NULL; - struct sync_mem_walk_arg wa; + struct rte_memseg_list *msl; int ret; if (heap_name == NULL || va_addr == NULL || len == 0 || @@ -513,23 +473,35 @@ sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach) } /* find corresponding memseg list to sync to */ - wa.va_addr = va_addr; - wa.len = len; - wa.result = -ENOENT; /* fail unless explicitly told to succeed */ - wa.attach = attach; - - /* we're already holding a read lock */ - rte_memseg_list_walk_thread_unsafe(sync_mem_walk, &wa); - - if (wa.result < 0) { - rte_errno = -wa.result; + msl = malloc_heap_find_external_seg(va_addr, len); + if (msl == NULL) { ret = -1; - } else { - /* notify all subscribers that a new memory area was added */ - if (attach) + goto unlock; + } + + if (attach) { + ret = rte_fbarray_attach(&msl->memseg_arr); + if (ret == 0) { + /* notify all subscribers that a new memory area was + * added. + */ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, va_addr, len); - ret = 0; + } else { + ret = -1; + goto unlock; + } + } else { + /* notify all subscribers that a memory area is about to + * be removed. + */ + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + msl->base_va, msl->len); + ret = rte_fbarray_detach(&msl->memseg_arr); + if (ret < 0) { + ret = -1; + goto unlock; + } } unlock: rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); From patchwork Thu Nov 29 13:48:34 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 48408 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 45A1F1B4D3; Thu, 29 Nov 2018 14:48:45 +0100 (CET) Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by dpdk.org (Postfix) with ESMTP id 25E4E1B4AA for ; Thu, 29 Nov 2018 14:48:38 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga106.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 29 Nov 2018 05:48:38 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,294,1539673200"; d="scan'208";a="97551168" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga008.fm.intel.com with ESMTP; 29 Nov 2018 05:48:36 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wATDma39012505; Thu, 29 Nov 2018 13:48:36 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wATDmaSj019992; Thu, 29 Nov 2018 13:48:36 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wATDmaqF019985; Thu, 29 Nov 2018 13:48:36 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: John McNamara , Marko Kovacevic , shahafs@mellanox.com, yskoh@mellanox.com, thomas@monjalon.net, shreyansh.jain@nxp.com Date: Thu, 29 Nov 2018 13:48:34 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 3/4] mem: allow registering external memory areas X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The general use-case of using external memory is well covered by existing external memory API's. However, certain use cases require manual management of externally allocated memory areas, so this memory should not be added to the heap. It should, however, be added to DPDK's internal structures, so that API's like ``rte_virt2memseg`` would work on such external memory segments. This commit adds such an API to DPDK. The new functions will allow to register and unregister externally allocated memory areas, as well as documentation for them. Signed-off-by: Anatoly Burakov Acked-by: Yongseok Koh --- .../prog_guide/env_abstraction_layer.rst | 60 ++++++++++++--- lib/librte_eal/common/eal_common_memory.c | 74 +++++++++++++++++++ lib/librte_eal/common/include/rte_memory.h | 63 ++++++++++++++++ lib/librte_eal/rte_eal_version.map | 2 + 4 files changed, 189 insertions(+), 10 deletions(-) diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst index 8b5d050c7..d7799b626 100644 --- a/doc/guides/prog_guide/env_abstraction_layer.rst +++ b/doc/guides/prog_guide/env_abstraction_layer.rst @@ -212,17 +212,26 @@ Normally, these options do not need to be changed. Support for Externally Allocated Memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is possible to use externally allocated memory in DPDK, using a set of malloc -heap API's. Support for externally allocated memory is implemented through -overloading the socket ID - externally allocated heaps will have socket ID's -that would be considered invalid under normal circumstances. Requesting an -allocation to take place from a specified externally allocated memory is a -matter of supplying the correct socket ID to DPDK allocator, either directly -(e.g. through a call to ``rte_malloc``) or indirectly (through data -structure-specific allocation API's such as ``rte_ring_create``). +It is possible to use externally allocated memory in DPDK. There are two ways in +which using externally allocated memory can work: the malloc heap API's, and +manual memory management. -Since there is no way DPDK can verify whether memory are is available or valid, -this responsibility falls on the shoulders of the user. All multiprocess ++ Using heap API's for externally allocated memory + +Using using a set of malloc heap API's is the recommended way to use externally +allocated memory in DPDK. In this way, support for externally allocated memory +is implemented through overloading the socket ID - externally allocated heaps +will have socket ID's that would be considered invalid under normal +circumstances. Requesting an allocation to take place from a specified +externally allocated memory is a matter of supplying the correct socket ID to +DPDK allocator, either directly (e.g. through a call to ``rte_malloc``) or +indirectly (through data structure-specific allocation API's such as +``rte_ring_create``). Using these API's also ensures that mapping of externally +allocated memory for DMA is also performed on any memory segment that is added +to a DPDK malloc heap. + +Since there is no way DPDK can verify whether memory is available or valid, this +responsibility falls on the shoulders of the user. All multiprocess synchronization is also user's responsibility, as well as ensuring that all calls to add/attach/detach/remove memory are done in the correct order. It is not required to attach to a memory area in all processes - only attach to memory @@ -246,6 +255,37 @@ The expected workflow is as follows: For more information, please refer to ``rte_malloc`` API documentation, specifically the ``rte_malloc_heap_*`` family of function calls. ++ Using externally allocated memory without DPDK API's + +While using heap API's is the recommended method of using externally allocated +memory in DPDK, there are certain use cases where the overhead of DPDK heap API +is undesirable - for example, when manual memory management is performed on an +externally allocated area. To support use cases where externally allocated +memory will not be used as part of normal DPDK workflow, there is also another +set of API's under the ``rte_extmem_*`` namespace. + +These API's are (as their name implies) intended to allow registering or +unregistering externally allocated memory to/from DPDK's internal page table, to +allow API's like ``rte_virt2memseg`` etc. to work with externally allocated +memory. Memory added this way will not be available for any regular DPDK +allocators; DPDK will leave this memory for the user application to manage. + +The expected workflow is as follows: + +* Get a pointer to memory area +* Register memory within DPDK + - If IOVA table is not specified, IOVA addresses will be assumed to be + unavailable +* Perform DMA mapping with ``rte_vfio_dma_map`` if needed +* Use the memory area in your application +* If memory area is no longer needed, it can be unregistered + - If the area was mapped for DMA, unmapping must be performed before + unregistering memory + +Since these externally allocated memory areas will not be managed by DPDK, it is +therefore up to the user application to decide how to use them and what to do +with them once they're registered. + Per-lcore and Shared Variables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index d47ea4938..a2e085ae8 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -24,6 +24,7 @@ #include "eal_memalloc.h" #include "eal_private.h" #include "eal_internal_cfg.h" +#include "malloc_heap.h" /* * Try to mmap *size bytes in /dev/zero. If it is successful, return the @@ -775,6 +776,79 @@ rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset) return ret; } +int __rte_experimental +rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], + unsigned int n_pages, size_t page_sz) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned int socket_id; + int ret = 0; + + if (va_addr == NULL || page_sz == 0 || len == 0 || + !rte_is_power_of_2(page_sz) || + RTE_ALIGN(len, page_sz) != len) { + rte_errno = EINVAL; + return -1; + } + rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); + + /* make sure the segment doesn't already exist */ + if (malloc_heap_find_external_seg(va_addr, len) != NULL) { + rte_errno = EEXIST; + ret = -1; + goto unlock; + } + + /* get next available socket ID */ + socket_id = mcfg->next_socket_id; + if (socket_id > INT32_MAX) { + RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n"); + rte_errno = ENOSPC; + ret = -1; + goto unlock; + } + + /* we can create a new memseg */ + if (malloc_heap_create_external_seg(va_addr, iova_addrs, n_pages, + page_sz, "extmem", socket_id) == NULL) { + ret = -1; + goto unlock; + } + + /* memseg list successfully created - increment next socket ID */ + mcfg->next_socket_id++; +unlock: + rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); + return ret; +} + +int __rte_experimental +rte_extmem_unregister(void *va_addr, size_t len) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + int ret = 0; + + if (va_addr == NULL || len == 0) { + rte_errno = EINVAL; + return -1; + } + rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); + + /* find our segment */ + msl = malloc_heap_find_external_seg(va_addr, len); + if (msl == NULL) { + rte_errno = ENOENT; + ret = -1; + goto unlock; + } + + ret = malloc_heap_destroy_external_seg(msl); +unlock: + rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); + return ret; +} + /* init memory subsystem */ int rte_eal_memory_init(void) diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index d970825df..4a43c1a9e 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -423,6 +423,69 @@ int __rte_experimental rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, size_t *offset); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Register external memory chunk with DPDK. + * + * @note Using this API is mutually exclusive with ``rte_malloc`` family of + * API's. + * + * @note This API will not perform any DMA mapping. It is expected that user + * will do that themselves. + * + * @param va_addr + * Start of virtual area to register + * @param len + * Length of virtual area to register + * @param iova_addrs + * Array of page IOVA addresses corresponding to each page in this memory + * area. Can be NULL, in which case page IOVA addresses will be set to + * RTE_BAD_IOVA. + * @param n_pages + * Number of elements in the iova_addrs array. Ignored if ``iova_addrs`` + * is NULL. + * @param page_sz + * Page size of the underlying memory + * + * @return + * - 0 on success + * - -1 in case of error, with rte_errno set to one of the following: + * EINVAL - one of the parameters was invalid + * EEXIST - memory chunk is already registered + * ENOSPC - no more space in internal config to store a new memory chunk + */ +int __rte_experimental +rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], + unsigned int n_pages, size_t page_sz); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Unregister external memory chunk with DPDK. + * + * @note Using this API is mutually exclusive with ``rte_malloc`` family of + * API's. + * + * @note This API will not perform any DMA unmapping. It is expected that user + * will do that themselves. + * + * @param va_addr + * Start of virtual area to unregister + * @param len + * Length of virtual area to unregister + * + * @return + * - 0 on success + * - -1 in case of error, with rte_errno set to one of the following: + * EINVAL - one of the parameters was invalid + * ENOENT - memory chunk was not found + */ +int __rte_experimental +rte_extmem_unregister(void *va_addr, size_t len); + /** * Dump the physical memory layout to a file. * diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 3fe78260d..593691a14 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -296,6 +296,8 @@ EXPERIMENTAL { rte_devargs_remove; rte_devargs_type_count; rte_eal_cleanup; + rte_extmem_register; + rte_extmem_unregister; rte_fbarray_attach; rte_fbarray_destroy; rte_fbarray_detach; From patchwork Thu Nov 29 13:48:35 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anatoly Burakov X-Patchwork-Id: 48409 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 5D02E1B4E1; Thu, 29 Nov 2018 14:48:47 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 921BF1B4B3 for ; Thu, 29 Nov 2018 14:48:39 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 29 Nov 2018 05:48:38 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,294,1539673200"; d="scan'208";a="96751622" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga008.jf.intel.com with ESMTP; 29 Nov 2018 05:48:36 -0800 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id wATDmajG012510; Thu, 29 Nov 2018 13:48:36 GMT Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id wATDma4A019999; Thu, 29 Nov 2018 13:48:36 GMT Received: (from aburakov@localhost) by sivswdev01.ir.intel.com with LOCAL id wATDmaRb019995; Thu, 29 Nov 2018 13:48:36 GMT From: Anatoly Burakov To: dev@dpdk.org Cc: John McNamara , Marko Kovacevic , shahafs@mellanox.com, yskoh@mellanox.com, thomas@monjalon.net, shreyansh.jain@nxp.com Date: Thu, 29 Nov 2018 13:48:35 +0000 Message-Id: X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 4/4] mem: allow usage of non-heap external memory in multiprocess X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add multiprocess support for externally allocated memory areas that are not added to DPDK heap (and add relevant doc sections). Signed-off-by: Anatoly Burakov Acked-by: Yongseok Koh --- .../prog_guide/env_abstraction_layer.rst | 3 + lib/librte_eal/common/eal_common_memory.c | 42 +++++++++++++ lib/librte_eal/common/include/rte_memory.h | 59 +++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 2 + 4 files changed, 106 insertions(+) diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst index d7799b626..b0491bf2d 100644 --- a/doc/guides/prog_guide/env_abstraction_layer.rst +++ b/doc/guides/prog_guide/env_abstraction_layer.rst @@ -276,11 +276,14 @@ The expected workflow is as follows: * Register memory within DPDK - If IOVA table is not specified, IOVA addresses will be assumed to be unavailable + - Other processes must attach to the memory area before they can use it * Perform DMA mapping with ``rte_vfio_dma_map`` if needed * Use the memory area in your application * If memory area is no longer needed, it can be unregistered - If the area was mapped for DMA, unmapping must be performed before unregistering memory + - Other processes must detach from the memory area before it can be + unregistered Since these externally allocated memory areas will not be managed by DPDK, it is therefore up to the user application to decide how to use them and what to do diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index a2e085ae8..67b445c31 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -849,6 +849,48 @@ rte_extmem_unregister(void *va_addr, size_t len) return ret; } +static int +sync_memory(void *va_addr, size_t len, bool attach) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + int ret = 0; + + if (va_addr == NULL || len == 0) { + rte_errno = EINVAL; + return -1; + } + rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); + + /* find our segment */ + msl = malloc_heap_find_external_seg(va_addr, len); + if (msl == NULL) { + rte_errno = ENOENT; + ret = -1; + goto unlock; + } + if (attach) + ret = rte_fbarray_attach(&msl->memseg_arr); + else + ret = rte_fbarray_detach(&msl->memseg_arr); + +unlock: + rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock); + return ret; +} + +int __rte_experimental +rte_extmem_attach(void *va_addr, size_t len) +{ + return sync_memory(va_addr, len, true); +} + +int __rte_experimental +rte_extmem_detach(void *va_addr, size_t len) +{ + return sync_memory(va_addr, len, false); +} + /* init memory subsystem */ int rte_eal_memory_init(void) diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4a43c1a9e..050bb6d8e 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -435,6 +435,10 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, * @note This API will not perform any DMA mapping. It is expected that user * will do that themselves. * + * @note Before accessing this memory in other processes, it needs to be + * attached in each of those processes by calling ``rte_extmem_attach`` in + * each other process. + * * @param va_addr * Start of virtual area to register * @param len @@ -472,6 +476,9 @@ rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], * @note This API will not perform any DMA unmapping. It is expected that user * will do that themselves. * + * @note Before calling this function, all other processes must call + * ``rte_extmem_detach`` to detach from the memory area. + * * @param va_addr * Start of virtual area to unregister * @param len @@ -486,6 +493,58 @@ rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], int __rte_experimental rte_extmem_unregister(void *va_addr, size_t len); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Attach to external memory chunk registered in another process. + * + * @note Using this API is mutually exclusive with ``rte_malloc`` family of + * API's. + * + * @note This API will not perform any DMA mapping. It is expected that user + * will do that themselves. + * + * @param va_addr + * Start of virtual area to register + * @param len + * Length of virtual area to register + * + * @return + * - 0 on success + * - -1 in case of error, with rte_errno set to one of the following: + * EINVAL - one of the parameters was invalid + * ENOENT - memory chunk was not found + */ +int __rte_experimental +rte_extmem_attach(void *va_addr, size_t len); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Detach from external memory chunk registered in another process. + * + * @note Using this API is mutually exclusive with ``rte_malloc`` family of + * API's. + * + * @note This API will not perform any DMA unmapping. It is expected that user + * will do that themselves. + * + * @param va_addr + * Start of virtual area to unregister + * @param len + * Length of virtual area to unregister + * + * @return + * - 0 on success + * - -1 in case of error, with rte_errno set to one of the following: + * EINVAL - one of the parameters was invalid + * ENOENT - memory chunk was not found + */ +int __rte_experimental +rte_extmem_detach(void *va_addr, size_t len); + /** * Dump the physical memory layout to a file. * diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 593691a14..eb5f7b9cb 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -296,6 +296,8 @@ EXPERIMENTAL { rte_devargs_remove; rte_devargs_type_count; rte_eal_cleanup; + rte_extmem_attach; + rte_extmem_detach; rte_extmem_register; rte_extmem_unregister; rte_fbarray_attach;