From patchwork Tue Feb 9 17:30:06 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wiles, Keith" X-Patchwork-Id: 10445 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id B32AB2E83; Tue, 9 Feb 2016 18:30:42 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id C139C1396 for ; Tue, 9 Feb 2016 18:30:40 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga103.fm.intel.com with ESMTP; 09 Feb 2016 09:30:39 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,421,1449561600"; d="scan'208";a="911723324" Received: from bvanipen-mobl1.amr.corp.intel.com ([10.254.75.40]) by fmsmga002.fm.intel.com with ESMTP; 09 Feb 2016 09:30:38 -0800 From: Keith Wiles To: dev@dpdk.org Date: Tue, 9 Feb 2016 11:30:06 -0600 Message-Id: <1455039006-86816-1-git-send-email-keith.wiles@intel.com> X-Mailer: git-send-email 2.5.4 (Apple Git-61) In-Reply-To: <1454454177-26743-1-git-send-email-keith.wiles@intel.com> References: <1454454177-26743-1-git-send-email-keith.wiles@intel.com> In-Reply-To: <1454454177-26743-1-git-send-email-keith.wiles@intel.com> References: <1454454177-26743-1-git-send-email-keith.wiles@intel.com> Subject: [dpdk-dev] [PATCH v2] mempool: reduce rte_mempool structure size X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Patch v2 to add some comments and setup for RTE_NEXT_ABI changes. The rte_mempool structure is changed, which will cause an ABI change for this structure. Providing backward compat is not reasonable here as this structure is used in multiple defines/inlines. Allow mempool cache support to be dynamic depending on if the mempool being created needs cache support. Saves about 1.5M of memory used by the rte_mempool structure. Allocating small mempools which do not require cache can consume larges amounts of memory if you have a number of these mempools. Signed-off-by: Keith Wiles --- app/test/test_mempool.c | 5 ++ config/defconfig_x86_64-native-linuxapp-gcc | 5 ++ lib/librte_mempool/rte_mempool.c | 83 ++++++++++++++++++++++++++--- lib/librte_mempool/rte_mempool.h | 57 +++++++++++++++++++- 4 files changed, 143 insertions(+), 7 deletions(-) diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c index 72f8fb6..2829d40 100644 --- a/app/test/test_mempool.c +++ b/app/test/test_mempool.c @@ -122,8 +122,13 @@ test_mempool_basic(void) return -1; printf("get private data\n"); +#ifdef RTE_NEXT_ABI + if (rte_mempool_get_priv(mp) != (char *)mp + + MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size)) +#else if (rte_mempool_get_priv(mp) != (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num)) +#endif return -1; printf("get physical address of an object\n"); diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc index 60baf5b..02e9ace 100644 --- a/config/defconfig_x86_64-native-linuxapp-gcc +++ b/config/defconfig_x86_64-native-linuxapp-gcc @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y CONFIG_RTE_TOOLCHAIN="gcc" CONFIG_RTE_TOOLCHAIN_GCC=y +CONFIG_RTE_BUILD_SHARED_LIB=y +CONFIG_RTE_NEXT_ABI=n +CONFIG_RTE_EAL_IGB_UIO=n +CONFIG_RTE_LIBRTE_KNI=n +CONFIG_RTE_KNI_KMOD=n diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index aff5f6d..c61dc44 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -452,12 +452,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & RTE_CACHE_LINE_MASK) != 0); +#ifdef RTE_NEXT_ABI + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & + RTE_CACHE_LINE_MASK) != 0); +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & RTE_CACHE_LINE_MASK) != 0); RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) & RTE_CACHE_LINE_MASK) != 0); #endif +#endif /* RTE_NEXT_ABI */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & RTE_CACHE_LINE_MASK) != 0); @@ -527,9 +532,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, */ int head = sizeof(struct rte_mempool); int new_size = (private_data_size + head) % page_size; - if (new_size) { + if (new_size) private_data_size += page_size - new_size; - } } /* try to allocate tailq entry */ @@ -544,7 +548,12 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, * store mempool objects. Otherwise reserve a memzone that is large * enough to hold mempool header and metadata plus mempool objects. */ +#ifdef RTE_NEXT_ABI + mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size); + mempool_size += private_data_size; +#else mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; +#endif /* RTE_NEXT_ABI */ mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN); if (vaddr == NULL) mempool_size += (size_t)objsz.total_size * n; @@ -598,9 +607,22 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size); mp->private_data_size = private_data_size; +#ifdef RTE_NEXT_ABI + /* + * local_cache pointer is set even if cache_size is zero. + * The local_cache points to just past the elt_pa[] array. + */ + mp->local_cache = (struct rte_mempool_cache *) + ((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0)); + + /* calculate address of the first element for continuous mempool. */ + obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) + + private_data_size; +#else /* calculate address of the first element for continuous mempool. */ obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; +#endif /* RTE_NEXT_ABI */ obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN); /* populate address translation fields. */ @@ -613,9 +635,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, mp->elt_va_start = (uintptr_t)obj; mp->elt_pa[0] = mp->phys_addr + (mp->elt_va_start - (uintptr_t)mp); - - /* mempool elements in a separate chunk of memory. */ } else { + /* mempool elements in a separate chunk of memory. */ mp->elt_va_start = (uintptr_t)vaddr; memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num); } @@ -645,10 +666,21 @@ unsigned rte_mempool_count(const struct rte_mempool *mp) { unsigned count; +#ifdef RTE_NEXT_ABI + unsigned lcore_id; count = rte_ring_count(mp->ring); + if (mp->cache_size == 0) + return count; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + count += mp->local_cache[lcore_id].len; +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + + count = rte_ring_count(mp->ring); + { unsigned lcore_id; if (mp->cache_size == 0) @@ -658,7 +690,7 @@ rte_mempool_count(const struct rte_mempool *mp) count += mp->local_cache[lcore_id].len; } #endif - +#endif /* RTE_NEXT_ABI */ /* * due to race condition (access to len is not locked), the * total can be greater than size... so fix the result @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp) static unsigned rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) { +#ifdef RTE_NEXT_ABI + unsigned lcore_id; + unsigned count = 0; + unsigned cache_count; + + fprintf(f, " cache infos:\n"); + fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size); + if (mp->cache_size == 0) + return count; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + cache_count = mp->local_cache[lcore_id].len; + fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count); + count += cache_count; + } + fprintf(f, " total_cache_count=%u\n", count); + return count; +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 unsigned lcore_id; unsigned count = 0; @@ -691,6 +741,7 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) fprintf(f, " cache disabled\n"); return 0; #endif +#endif /* RTE_NEXT_ABI */ } #ifdef RTE_LIBRTE_MEMPOOL_DEBUG @@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp) #define mempool_audit_cookies(mp) do {} while(0) #endif +#ifdef RTE_NEXT_ABI +/* check cookies before and after objects */ +static void +mempool_audit_cache(const struct rte_mempool *mp) +{ + /* check cache size consistency */ + unsigned lcore_id; + + if (mp->cache_size == 0) + return; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) { + RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n", + lcore_id); + rte_panic("MEMPOOL: invalid cache len\n"); + } + } +} +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /* check cookies before and after objects */ static void @@ -773,7 +844,7 @@ mempool_audit_cache(const struct rte_mempool *mp) #else #define mempool_audit_cache(mp) do {} while(0) #endif - +#endif /* RTE_NEXT_ABI */ /* check the consistency of mempool (size, cookies, ...) */ void diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 6e2390a..fc9b595 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -95,6 +95,19 @@ struct rte_mempool_debug_stats { } __rte_cache_aligned; #endif +#ifdef RTE_NEXT_ABI +/** + * A structure that stores a per-core object cache. + */ +struct rte_mempool_cache { + unsigned len; /**< Cache len */ + /* + * Cache is allocated to this size to allow it to overflow in certain + * cases to avoid needless emptying of cache. + */ + void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */ +} __rte_cache_aligned; +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /** * A structure that stores a per-core object cache. @@ -108,6 +121,7 @@ struct rte_mempool_cache { void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */ } __rte_cache_aligned; #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ +#endif /* RTE_NEXT_ABI */ /** * A structure that stores the size of mempool elements. @@ -194,10 +208,14 @@ struct rte_mempool { unsigned private_data_size; /**< Size of private data. */ +#ifdef RTE_NEXT_ABI + struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */ +#else #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 /** Per-lcore local cache. */ struct rte_mempool_cache local_cache[RTE_MAX_LCORE]; #endif +#endif /* RTE_NEXT_ABI */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG /** Per-lcore statistics. */ @@ -246,6 +264,26 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) #endif +#ifdef RTE_NEXT_ABI +/** + * Size of elt_pa array size based on number of pages. (Internal use) + */ +#define __PA_SIZE(mp, pgn) \ + RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \ + sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE) + +/** + * Calculate the size of the mempool header. + * + * @param mp + * Pointer to the memory pool. + * @param pgn + * Number of pages used to store mempool objects. + */ +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \ + (sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \ + (sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE))) +#else /** * Calculate the size of the mempool header. * @@ -257,7 +295,7 @@ struct rte_mempool { #define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \ RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \ sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE)) - +#endif /* RTE_NEXT_ABI */ /** * Return true if the whole mempool is in contiguous memory. */ @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline)) __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, unsigned n, int is_mp) { +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +#endif /* RTE_NEXT_ABI */ struct rte_mempool_cache *cache; uint32_t index; void **cache_objs; unsigned lcore_id = rte_lcore_id(); uint32_t cache_size = mp->cache_size; uint32_t flushthresh = mp->cache_flushthresh; +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ +#endif /* RTE_NEXT_ABI */ /* increment stat now, adding in mempool always success */ __MEMPOOL_STAT_ADD(mp, put, n); +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +#endif /* RTE_NEXT_ABI */ /* cache is not enabled or single producer or non-EAL thread */ if (unlikely(cache_size == 0 || is_mp == 0 || lcore_id >= RTE_MAX_LCORE)) @@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, return; ring_enqueue: +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ +#endif /* RTE_NEXT_ABI */ /* push remaining objects in ring */ #ifdef RTE_LIBRTE_MEMPOOL_DEBUG @@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n, int is_mc) { int ret; +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +#endif /* RTE_NEXT_ABI */ struct rte_mempool_cache *cache; uint32_t index, len; void **cache_objs; @@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table, return 0; ring_dequeue: +#ifndef RTE_NEXT_ABI /* Note: ifndef */ #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ +#endif /* RTE_NEXT_ABI */ /* get remaining objects from ring */ if (is_mc) @@ -1293,7 +1343,12 @@ void rte_mempool_audit(const struct rte_mempool *mp); */ static inline void *rte_mempool_get_priv(struct rte_mempool *mp) { +#ifdef RTE_NEXT_ABI + return (char *)mp + + MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size); +#else return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num); +#endif /* RTE_NEXT_ABI */ } /**