From patchwork Tue Aug 22 00:19:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wang, Yipeng1" X-Patchwork-Id: 27706 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 1F2267D3F; Tue, 22 Aug 2017 02:21:08 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id C2D7E7D17 for ; Tue, 22 Aug 2017 02:21:05 +0200 (CEST) Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Aug 2017 17:21:05 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,410,1498546800"; d="scan'208";a="302858616" Received: from bdw-yipeng.jf.intel.com ([10.54.81.30]) by fmsmga004.fm.intel.com with ESMTP; 21 Aug 2017 17:21:04 -0700 From: Yipeng Wang To: vincent.jardin@6wind.com, stephen@networkplumber.org, bruce.richardson@intel.com, konstantin.ananyev@intel.com, thomas@monjalon.net Cc: dev@dpdk.org, yipeng1.wang@intel.com, charlie.tai@intel.com, sameh.gobriel@intel.com, ren.wang@intel.com Date: Mon, 21 Aug 2017 17:19:50 -0700 Message-Id: <1503361193-36699-5-git-send-email-yipeng1.wang@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1503361193-36699-1-git-send-email-yipeng1.wang@intel.com> References: <1503361193-36699-1-git-send-email-yipeng1.wang@intel.com> Subject: [dpdk-dev] [PATCH 4/7] member: add AVX for HT mode X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" For key search, the signatures of all entries are compared against the signature of the key that is being looked up. Since all signatures are contguously put in a bucket, they can be compared with vector instructions (AVX2), achieving higher lookup performance. This patch adds AVX2 implementation in a separate header file. Signed-off-by: Yipeng Wang --- lib/librte_member/rte_member_ht.c | 143 ++++++++++++++++++++++++++++--------- lib/librte_member/rte_member_x86.h | 111 ++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 32 deletions(-) create mode 100644 lib/librte_member/rte_member_x86.h diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c index 3e411ed..2f52220 100644 --- a/lib/librte_member/rte_member_ht.c +++ b/lib/librte_member/rte_member_ht.c @@ -40,6 +40,10 @@ #include "rte_member.h" #include "rte_member_ht.h" +#if defined(RTE_ARCH_X86) +#include "rte_member_x86.h" +#endif + static inline int insert_overwrite_search(uint32_t bucket, SIG_TYPE tmp_sig, @@ -133,6 +137,13 @@ rte_member_create_ht(struct rte_member_setsum *ss, for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; } +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && + RTE_MEMBER_BUCKET_ENTRIES == 16) + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; + else +#endif + ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; RTE_LOG(DEBUG, MEMBER, "Hash table based filter created, " @@ -172,11 +183,23 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss, *set_id = RTE_MEMBER_NO_MATCH; get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - if (search_bucket_single(prim_bucket, tmp_sig, buckets, - set_id) || - search_bucket_single(sec_bucket, tmp_sig, - buckets, set_id)) - return 1; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single_avx(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + break; +#endif + default: + if (search_bucket_single(prim_bucket, tmp_sig, buckets, + set_id) || + search_bucket_single(sec_bucket, tmp_sig, + buckets, set_id)) + return 1; + } return 0; } @@ -201,13 +224,27 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, } for (i = 0; i < num_keys; i++) { - if (search_bucket_single(prim_buckets[i], tmp_sig[i], - buckets, &set_id[i]) || - search_bucket_single(sec_buckets[i], - tmp_sig[i], buckets, &set_id[i])) - ret++; - else - set_id[i] = RTE_MEMBER_NO_MATCH; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (search_bucket_single_avx(prim_buckets[i], + tmp_sig[i], buckets, &set_id[i]) || + search_bucket_single_avx(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + ret++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + break; +#endif + default: + if (search_bucket_single(prim_buckets[i], tmp_sig[i], + buckets, &set_id[i]) || + search_bucket_single(sec_buckets[i], + tmp_sig[i], buckets, &set_id[i])) + ret++; + else + set_id[i] = RTE_MEMBER_NO_MATCH; + } } return ret; } @@ -225,12 +262,24 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); - search_bucket_multi(prim_bucket, tmp_sig, buckets, &ret, - match_per_key, set_id); - if (ret < match_per_key) - search_bucket_multi(sec_bucket, tmp_sig, - buckets, &ret, match_per_key, set_id); - return ret; + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, + &ret, match_per_key, set_id); + if (ret < match_per_key) + search_bucket_multi_avx(sec_bucket, tmp_sig, + buckets, &ret, match_per_key, set_id); + return ret; +#endif + default: + search_bucket_multi(prim_bucket, tmp_sig, buckets, &ret, + match_per_key, set_id); + if (ret < match_per_key) + search_bucket_multi(sec_bucket, tmp_sig, + buckets, &ret, match_per_key, set_id); + return ret; + } } @@ -257,16 +306,34 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, for (i = 0; i < num_keys; i++) { match_cnt_t = 0; - search_bucket_multi(prim_buckets[i], tmp_sig[i], - buckets, &match_cnt_t, match_per_key, - &set_ids[i*match_per_key]); - if (match_cnt_t < match_per_key) - search_bucket_multi(sec_buckets[i], tmp_sig[i], + switch (ss->sig_cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], buckets, &match_cnt_t, match_per_key, &set_ids[i*match_per_key]); - match_count[i] = match_cnt_t; - if (match_cnt_t != 0) - ret++; + if (match_cnt_t < match_per_key) + search_bucket_multi_avx(sec_buckets[i], + tmp_sig[i], buckets, &match_cnt_t, + match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + ret++; + break; +#endif + default: + search_bucket_multi(prim_buckets[i], tmp_sig[i], + buckets, &match_cnt_t, match_per_key, + &set_ids[i*match_per_key]); + if (match_cnt_t < match_per_key) + search_bucket_multi(sec_buckets[i], tmp_sig[i], + buckets, &match_cnt_t, match_per_key, + &set_ids[i*match_per_key]); + match_count[i] = match_cnt_t; + if (match_cnt_t != 0) + ret++; + } } return ret; } @@ -298,12 +365,24 @@ try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, static inline int try_overwrite(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, - SIG_TYPE sig, MEMBER_SET_TYPE set_id) + SIG_TYPE sig, MEMBER_SET_TYPE set_id, + enum rte_member_sig_compare_function cmp_fn) { - if (insert_overwrite_search(prim, sig, buckets, set_id) || - insert_overwrite_search(sec, sig, buckets, - set_id)) - return 0; + switch (cmp_fn) { +#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2) + case RTE_MEMBER_COMPARE_AVX2: + if (insert_overwrite_search_avx(prim, sig, buckets, set_id) || + insert_overwrite_search_avx(sec, sig, buckets, + set_id)) + return 0; + break; +#endif + default: + if (insert_overwrite_search(prim, sig, buckets, set_id) || + insert_overwrite_search(sec, sig, buckets, + set_id)) + return 0; + } return -1; } @@ -409,7 +488,7 @@ rte_member_add_ht(const struct rte_member_setsum *ss, /* if it is cache based filter, we try overwriting existing entry */ if (ss->cache) { ret = try_overwrite(buckets, prim_bucket, sec_bucket, tmp_sig, - set_id); + set_id, ss->sig_cmp_fn); if (ret != -1) return ret; } diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h new file mode 100644 index 0000000..c55f128 --- /dev/null +++ b/lib/librte_member/rte_member_x86.h @@ -0,0 +1,111 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMBER_X86_H_ +#define _RTE_MEMBER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + + +#if defined(RTE_MACHINE_CPUFLAG_AVX2) + + +static inline int +insert_overwrite_search_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + MEMBER_SET_TYPE set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + if (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + buckets[bucket].sets[hit_idx] = set_id; + return 1; + } + return 0; +} + + +static inline int +search_bucket_single_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + MEMBER_SET_TYPE *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + if (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + *set_id = buckets[bucket].sets[hit_idx]; + return 1; + } + hitmask &= ~(3U << (hit_idx) * 2); + } + return 0; +} + +static inline void +search_bucket_multi_avx(uint32_t bucket, SIG_TYPE tmp_sig, + struct member_ht_bucket *buckets, + uint32_t *counter, + uint32_t match_per_key, + MEMBER_SET_TYPE *set_id) +{ + uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16( + _mm256_load_si256((__m256i const *)buckets[bucket].sigs), + _mm256_set1_epi16(tmp_sig))); + while (hitmask) { + uint32_t hit_idx = __builtin_ctzl(hitmask) / 2; + if (buckets[bucket].sets[hit_idx] != RTE_MEMBER_NO_MATCH) { + set_id[*counter] = buckets[bucket].sets[hit_idx]; + (*counter)++; + if (*counter >= match_per_key) + return; + } + hitmask &= ~(3U << (hit_idx) * 2); + } +} +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMBER_X86_H_ */