From patchwork Fri Nov 1 15:21:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vladimir Medvedkin X-Patchwork-Id: 62323 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 767DAA00BE; Fri, 1 Nov 2019 16:23:34 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id A01981E886; Fri, 1 Nov 2019 16:22:09 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id D37EA1E86E for ; Fri, 1 Nov 2019 16:22:06 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 01 Nov 2019 08:22:06 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.68,256,1569308400"; d="scan'208";a="351979131" Received: from silpixa00400072.ir.intel.com ([10.237.222.213]) by orsmga004.jf.intel.com with ESMTP; 01 Nov 2019 08:22:04 -0700 From: Vladimir Medvedkin To: dev@dpdk.org Cc: bruce.richardson@intel.com, konstantin.ananyev@intel.com, thomas@monjalon.net, aconole@redhat.com Date: Fri, 1 Nov 2019 15:21:41 +0000 Message-Id: X-Mailer: git-send-email 2.7.4 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v6 08/12] fib: add dataplane algorithm for ipv6 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add fib implementation for ipv6 using modified DIR24_8 algorithm. Implementation is similar to current LPM6 implementation but has few enhancements: faster control plabe operations more bits for userdata in table entries configurable userdata size Signed-off-by: Vladimir Medvedkin --- lib/librte_fib/Makefile | 2 +- lib/librte_fib/meson.build | 2 +- lib/librte_fib/rte_fib6.c | 14 + lib/librte_fib/rte_fib6.h | 14 + lib/librte_fib/trie.c | 760 +++++++++++++++++++++++++++++++++++++++++++++ lib/librte_fib/trie.h | 37 +++ 6 files changed, 827 insertions(+), 2 deletions(-) create mode 100644 lib/librte_fib/trie.c create mode 100644 lib/librte_fib/trie.h diff --git a/lib/librte_fib/Makefile b/lib/librte_fib/Makefile index 67fde9a..4abd80b 100644 --- a/lib/librte_fib/Makefile +++ b/lib/librte_fib/Makefile @@ -17,7 +17,7 @@ EXPORT_MAP := rte_fib_version.map LIBABIVER := 1 # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_FIB) := rte_fib.c rte_fib6.c dir24_8.c +SRCS-$(CONFIG_RTE_LIBRTE_FIB) := rte_fib.c rte_fib6.c dir24_8.c trie.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_FIB)-include := rte_fib.h rte_fib6.h diff --git a/lib/librte_fib/meson.build b/lib/librte_fib/meson.build index c63cac8..e2c6f44 100644 --- a/lib/librte_fib/meson.build +++ b/lib/librte_fib/meson.build @@ -3,6 +3,6 @@ # Copyright(c) 2019 Intel Corporation allow_experimental_apis = true -sources = files('rte_fib.c', 'rte_fib6.c', 'dir24_8.c') +sources = files('rte_fib.c', 'rte_fib6.c', 'dir24_8.c', 'trie.c') headers = files('rte_fib.h', 'rte_fib6.h') deps += ['rib'] diff --git a/lib/librte_fib/rte_fib6.c b/lib/librte_fib/rte_fib6.c index 9f00a80..354227d 100644 --- a/lib/librte_fib/rte_fib6.c +++ b/lib/librte_fib/rte_fib6.c @@ -17,6 +17,8 @@ #include #include +#include "trie.h" + TAILQ_HEAD(rte_fib6_list, rte_tailq_entry); static struct rte_tailq_elem rte_fib6_tailq = { .name = "RTE_FIB6", @@ -92,12 +94,22 @@ static int init_dataplane(struct rte_fib6 *fib, __rte_unused int socket_id, struct rte_fib6_conf *conf) { + char dp_name[sizeof(void *)]; + + snprintf(dp_name, sizeof(dp_name), "%p", fib); switch (conf->type) { case RTE_FIB6_DUMMY: fib->dp = fib; fib->lookup = dummy_lookup; fib->modify = dummy_modify; return 0; + case RTE_FIB6_TRIE: + fib->dp = trie_create(dp_name, socket_id, conf); + if (fib->dp == NULL) + return -rte_errno; + fib->lookup = rte_trie_get_lookup_fn(conf); + fib->modify = trie_modify; + return 0; default: return -EINVAL; } @@ -260,6 +272,8 @@ free_dataplane(struct rte_fib6 *fib) switch (fib->type) { case RTE_FIB6_DUMMY: return; + case RTE_FIB6_TRIE: + trie_free(fib->dp); default: return; } diff --git a/lib/librte_fib/rte_fib6.h b/lib/librte_fib/rte_fib6.h index 3322123..4268704 100644 --- a/lib/librte_fib/rte_fib6.h +++ b/lib/librte_fib/rte_fib6.h @@ -19,10 +19,12 @@ #define RTE_FIB6_MAXDEPTH 128 struct rte_fib6; +struct rte_rib6; /** Type of FIB struct */ enum rte_fib6_type { RTE_FIB6_DUMMY, /**< RIB6 tree based FIB */ + RTE_FIB6_TRIE, /**< TRIE based fib */ RTE_FIB6_TYPE_MAX }; @@ -40,12 +42,24 @@ enum rte_fib6_op { RTE_FIB6_DEL, }; +enum rte_fib_trie_nh_sz { + RTE_FIB6_TRIE_2B = 1, + RTE_FIB6_TRIE_4B, + RTE_FIB6_TRIE_8B +}; + /** FIB configuration structure */ struct rte_fib6_conf { enum rte_fib6_type type; /**< Type of FIB struct */ /** Default value returned on lookup if there is no route */ uint64_t default_nh; int max_routes; + union { + struct { + enum rte_fib_trie_nh_sz nh_sz; + uint32_t num_tbl8; + } trie; + }; }; /** diff --git a/lib/librte_fib/trie.c b/lib/librte_fib/trie.c new file mode 100644 index 0000000..198e815 --- /dev/null +++ b/lib/librte_fib/trie.c @@ -0,0 +1,760 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Vladimir Medvedkin + * Copyright(c) 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "trie.h" + +/* @internal Total number of tbl24 entries. */ +#define TRIE_TBL24_NUM_ENT (1 << 24) + +/* Maximum depth value possible for IPv6 LPM. */ +#define TRIE_MAX_DEPTH 128 + +/* @internal Number of entries in a tbl8 group. */ +#define TRIE_TBL8_GRP_NUM_ENT 256ULL + +/* @internal Total number of tbl8 groups in the tbl8. */ +#define TRIE_TBL8_NUM_GROUPS 65536 + +/* @internal bitmask with valid and valid_group fields set */ +#define TRIE_EXT_ENT 1 + +#define TRIE_NAMESIZE 64 + +#define BITMAP_SLAB_BIT_SIZE_LOG2 6 +#define BITMAP_SLAB_BIT_SIZE (1ULL << BITMAP_SLAB_BIT_SIZE_LOG2) +#define BITMAP_SLAB_BITMASK (BITMAP_SLAB_BIT_SIZE - 1) + +struct rte_trie_tbl { + uint32_t number_tbl8s; /**< Total number of tbl8s */ + uint32_t rsvd_tbl8s; /**< Number of reserved tbl8s */ + uint32_t cur_tbl8s; /**< Current cumber of tbl8s */ + uint64_t def_nh; /**< Default next hop */ + enum rte_fib_trie_nh_sz nh_sz; /**< Size of nexthop entry */ + uint64_t *tbl8; /**< tbl8 table. */ + uint32_t *tbl8_pool; /**< bitmap containing free tbl8 idxes*/ + uint32_t tbl8_pool_pos; + /* tbl24 table. */ + __extension__ uint64_t tbl24[0] __rte_cache_aligned; +}; + +enum edge { + LEDGE, + REDGE +}; + +enum lookup_type { + MACRO, + INLINE, + UNI +}; +static enum lookup_type test_lookup = MACRO; + +static inline uint32_t +get_tbl24_idx(const uint8_t *ip) +{ + return ip[0] << 16|ip[1] << 8|ip[2]; +} + +static inline void * +get_tbl24_p(struct rte_trie_tbl *dp, const uint8_t *ip, uint8_t nh_sz) +{ + uint32_t tbl24_idx; + + tbl24_idx = get_tbl24_idx(ip); + return (void *)&((uint8_t *)dp->tbl24)[tbl24_idx << nh_sz]; +} + +static inline uint8_t +bits_in_nh(uint8_t nh_sz) +{ + return 8 * (1 << nh_sz); +} + +static inline uint64_t +get_max_nh(uint8_t nh_sz) +{ + return ((1ULL << (bits_in_nh(nh_sz) - 1)) - 1); +} + +static inline uint64_t +lookup_msk(uint8_t nh_sz) +{ + return ((1ULL << ((1 << (nh_sz + 3)) - 1)) << 1) - 1; +} + +static inline uint8_t +get_psd_idx(uint32_t val, uint8_t nh_sz) +{ + return val & ((1 << (3 - nh_sz)) - 1); +} + +static inline uint32_t +get_tbl_pos(uint32_t val, uint8_t nh_sz) +{ + return val >> (3 - nh_sz); +} + +static inline uint64_t +get_tbl_val_by_idx(uint64_t *tbl, uint32_t idx, uint8_t nh_sz) +{ + return ((tbl[get_tbl_pos(idx, nh_sz)] >> (get_psd_idx(idx, nh_sz) * + bits_in_nh(nh_sz))) & lookup_msk(nh_sz)); +} + +static inline void * +get_tbl_p_by_idx(uint64_t *tbl, uint64_t idx, uint8_t nh_sz) +{ + return (uint8_t *)tbl + (idx << nh_sz); +} + +static inline int +is_entry_extended(uint64_t ent) +{ + return (ent & TRIE_EXT_ENT) == TRIE_EXT_ENT; +} + +#define LOOKUP_FUNC(suffix, type, nh_sz) \ +static void rte_trie_lookup_bulk_##suffix(void *p, \ + uint8_t ips[][RTE_FIB6_IPV6_ADDR_SIZE], \ + uint64_t *next_hops, const unsigned int n) \ +{ \ + struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p; \ + uint64_t tmp; \ + uint32_t i, j; \ + \ + for (i = 0; i < n; i++) { \ + tmp = ((type *)dp->tbl24)[get_tbl24_idx(&ips[i][0])]; \ + j = 3; \ + while (is_entry_extended(tmp)) { \ + tmp = ((type *)dp->tbl8)[ips[i][j++] + \ + ((tmp >> 1) * TRIE_TBL8_GRP_NUM_ENT)]; \ + } \ + next_hops[i] = tmp >> 1; \ + } \ +} +LOOKUP_FUNC(2b, uint16_t, 1) +LOOKUP_FUNC(4b, uint32_t, 2) +LOOKUP_FUNC(8b, uint64_t, 3) + +rte_fib6_lookup_fn_t +rte_trie_get_lookup_fn(struct rte_fib6_conf *conf) +{ + enum rte_fib_trie_nh_sz nh_sz = conf->trie.nh_sz; + + if (test_lookup == MACRO) { + switch (nh_sz) { + case RTE_FIB6_TRIE_2B: + return rte_trie_lookup_bulk_2b; + case RTE_FIB6_TRIE_4B: + return rte_trie_lookup_bulk_4b; + case RTE_FIB6_TRIE_8B: + return rte_trie_lookup_bulk_8b; + } + } + + return NULL; +} + +static void +write_to_dp(void *ptr, uint64_t val, enum rte_fib_trie_nh_sz size, int n) +{ + int i; + uint16_t *ptr16 = (uint16_t *)ptr; + uint32_t *ptr32 = (uint32_t *)ptr; + uint64_t *ptr64 = (uint64_t *)ptr; + + switch (size) { + case RTE_FIB6_TRIE_2B: + for (i = 0; i < n; i++) + ptr16[i] = (uint16_t)val; + break; + case RTE_FIB6_TRIE_4B: + for (i = 0; i < n; i++) + ptr32[i] = (uint32_t)val; + break; + case RTE_FIB6_TRIE_8B: + for (i = 0; i < n; i++) + ptr64[i] = (uint64_t)val; + break; + } +} + +static void +tbl8_pool_init(struct rte_trie_tbl *dp) +{ + uint32_t i; + + /* put entire range of indexes to the tbl8 pool */ + for (i = 0; i < dp->number_tbl8s; i++) + dp->tbl8_pool[i] = i; + + dp->tbl8_pool_pos = 0; +} + +/* + * Get an index of a free tbl8 from the pool + */ +static inline int32_t +tbl8_get(struct rte_trie_tbl *dp) +{ + if (dp->tbl8_pool_pos == dp->number_tbl8s) + /* no more free tbl8 */ + return -ENOSPC; + + /* next index */ + return dp->tbl8_pool[dp->tbl8_pool_pos++]; +} + +/* + * Put an index of a free tbl8 back to the pool + */ +static inline void +tbl8_put(struct rte_trie_tbl *dp, uint32_t tbl8_ind) +{ + dp->tbl8_pool[--dp->tbl8_pool_pos] = tbl8_ind; +} + +static int +tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh) +{ + int64_t tbl8_idx; + uint8_t *tbl8_ptr; + + tbl8_idx = tbl8_get(dp); + if (tbl8_idx < 0) + return tbl8_idx; + tbl8_ptr = (uint8_t *)dp->tbl8 + + ((tbl8_idx * TRIE_TBL8_GRP_NUM_ENT) << + dp->nh_sz); + /*Init tbl8 entries with nexthop from tbl24*/ + write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz, + TRIE_TBL8_GRP_NUM_ENT); + return tbl8_idx; +} + +static void +tbl8_recycle(struct rte_trie_tbl *dp, void *par, uint64_t tbl8_idx) +{ + uint32_t i; + uint64_t nh; + uint16_t *ptr16; + uint32_t *ptr32; + uint64_t *ptr64; + + switch (dp->nh_sz) { + case RTE_FIB6_TRIE_2B: + ptr16 = &((uint16_t *)dp->tbl8)[tbl8_idx * + TRIE_TBL8_GRP_NUM_ENT]; + nh = *ptr16; + if (nh & TRIE_EXT_ENT) + return; + for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) { + if (nh != ptr16[i]) + return; + } + write_to_dp(par, nh, dp->nh_sz, 1); + for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++) + ptr16[i] = 0; + break; + case RTE_FIB6_TRIE_4B: + ptr32 = &((uint32_t *)dp->tbl8)[tbl8_idx * + TRIE_TBL8_GRP_NUM_ENT]; + nh = *ptr32; + if (nh & TRIE_EXT_ENT) + return; + for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) { + if (nh != ptr32[i]) + return; + } + write_to_dp(par, nh, dp->nh_sz, 1); + for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++) + ptr32[i] = 0; + break; + case RTE_FIB6_TRIE_8B: + ptr64 = &((uint64_t *)dp->tbl8)[tbl8_idx * + TRIE_TBL8_GRP_NUM_ENT]; + nh = *ptr64; + if (nh & TRIE_EXT_ENT) + return; + for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) { + if (nh != ptr64[i]) + return; + } + write_to_dp(par, nh, dp->nh_sz, 1); + for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++) + ptr64[i] = 0; + break; + } + tbl8_put(dp, tbl8_idx); +} + +#define BYTE_SIZE 8 +static inline uint32_t +get_idx(const uint8_t *ip, uint32_t prev_idx, int bytes, int first_byte) +{ + int i; + uint32_t idx = 0; + uint8_t bitshift; + + for (i = first_byte; i < (first_byte + bytes); i++) { + bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE); + idx |= ip[i] << bitshift; + } + return (prev_idx * 256) + idx; +} + +static inline uint64_t +get_val_by_p(void *p, uint8_t nh_sz) +{ + uint64_t val = 0; + + switch (nh_sz) { + case RTE_FIB6_TRIE_2B: + val = *(uint16_t *)p; + break; + case RTE_FIB6_TRIE_4B: + val = *(uint32_t *)p; + break; + case RTE_FIB6_TRIE_8B: + val = *(uint64_t *)p; + break; + } + return val; +} + +/* + * recursively recycle tbl8's + */ +static void +recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part, + uint8_t common_tbl8, void *prev) +{ + void *p; + uint64_t val; + + val = get_val_by_p(prev, dp->nh_sz); + if (unlikely((val & TRIE_EXT_ENT) != TRIE_EXT_ENT)) + return; + + if (common_tbl8 != 0) { + p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) * 256 + *ip_part, + dp->nh_sz); + recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p); + } + tbl8_recycle(dp, prev, val >> 1); +} + +static inline int +build_common_root(struct rte_trie_tbl *dp, const uint8_t *ip, + int common_bytes, void **tbl) +{ + void *tbl_ptr = NULL; + uint64_t *cur_tbl; + uint64_t val; + int i, j, idx, prev_idx = 0; + + cur_tbl = dp->tbl24; + for (i = 3, j = 0; i <= common_bytes; i++) { + idx = get_idx(ip, prev_idx, i - j, j); + val = get_tbl_val_by_idx(cur_tbl, idx, dp->nh_sz); + tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx, dp->nh_sz); + if ((val & TRIE_EXT_ENT) != TRIE_EXT_ENT) { + idx = tbl8_alloc(dp, val); + if (unlikely(idx < 0)) + return idx; + write_to_dp(tbl_ptr, (idx << 1) | + TRIE_EXT_ENT, dp->nh_sz, 1); + prev_idx = idx; + } else + prev_idx = val >> 1; + + j = i; + cur_tbl = dp->tbl8; + } + *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * 256, dp->nh_sz); + return 0; +} + +static int +write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop, + int len, enum edge edge, void *ent) +{ + uint64_t val = next_hop << 1; + int tbl8_idx; + int ret = 0; + void *p; + + if (len != 0) { + val = get_val_by_p(ent, dp->nh_sz); + if ((val & TRIE_EXT_ENT) == TRIE_EXT_ENT) + tbl8_idx = val >> 1; + else { + tbl8_idx = tbl8_alloc(dp, val); + if (tbl8_idx < 0) + return tbl8_idx; + val = (tbl8_idx << 1)|TRIE_EXT_ENT; + } + p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx * 256) + *ip_part, + dp->nh_sz); + ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p); + if (ret < 0) + return ret; + if (edge == LEDGE) { + write_to_dp((uint8_t *)p + (1 << dp->nh_sz), + next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part); + } else { + write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx * 256, + dp->nh_sz), + next_hop << 1, dp->nh_sz, *ip_part); + } + tbl8_recycle(dp, &val, tbl8_idx); + } + + write_to_dp(ent, val, dp->nh_sz, 1); + return ret; +} + +#define IPV6_MAX_IDX (RTE_FIB6_IPV6_ADDR_SIZE - 1) +#define TBL24_BYTES 3 +#define TBL8_LEN (RTE_FIB6_IPV6_ADDR_SIZE - TBL24_BYTES) + +static int +install_to_dp(struct rte_trie_tbl *dp, const uint8_t *ledge, const uint8_t *r, + uint64_t next_hop) +{ + void *common_root_tbl; + void *ent; + int ret; + int i; + int common_bytes; + int llen, rlen; + uint8_t redge[16]; + + /* decrement redge by 1*/ + rte_rib6_copy_addr(redge, r); + for (i = 15; i >= 0; i--) { + redge[i]--; + if (redge[i] != 0xff) + break; + } + + for (common_bytes = 0; common_bytes < 15; common_bytes++) { + if (ledge[common_bytes] != redge[common_bytes]) + break; + } + + ret = build_common_root(dp, ledge, common_bytes, &common_root_tbl); + if (unlikely(ret != 0)) + return ret; + /*first uncommon tbl8 byte idx*/ + uint8_t first_tbl8_byte = RTE_MAX(common_bytes, TBL24_BYTES); + + for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) { + if (ledge[i] != 0) + break; + } + + llen = i - first_tbl8_byte + (common_bytes < 3); + + for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) { + if (redge[i] != UINT8_MAX) + break; + } + rlen = i - first_tbl8_byte + (common_bytes < 3); + + /*first noncommon byte*/ + uint8_t first_byte_idx = (common_bytes < 3) ? 0 : common_bytes; + uint8_t first_idx_len = (common_bytes < 3) ? 3 : 1; + + uint32_t left_idx = get_idx(ledge, 0, first_idx_len, first_byte_idx); + uint32_t right_idx = get_idx(redge, 0, first_idx_len, first_byte_idx); + + ent = get_tbl_p_by_idx(common_root_tbl, left_idx, dp->nh_sz); + ret = write_edge(dp, &ledge[first_tbl8_byte + !(common_bytes < 3)], + next_hop, llen, LEDGE, ent); + if (ret < 0) + return ret; + + if (right_idx > left_idx + 1) { + ent = get_tbl_p_by_idx(common_root_tbl, left_idx + 1, + dp->nh_sz); + write_to_dp(ent, next_hop << 1, dp->nh_sz, + right_idx - (left_idx + 1)); + } + ent = get_tbl_p_by_idx(common_root_tbl, right_idx, dp->nh_sz); + ret = write_edge(dp, &redge[first_tbl8_byte + !((common_bytes < 3))], + next_hop, rlen, REDGE, ent); + if (ret < 0) + return ret; + + uint8_t common_tbl8 = (common_bytes < TBL24_BYTES) ? + 0 : common_bytes - (TBL24_BYTES - 1); + ent = get_tbl24_p(dp, ledge, dp->nh_sz); + recycle_root_path(dp, ledge + TBL24_BYTES, common_tbl8, ent); + return 0; +} + +static void +get_nxt_net(uint8_t *ip, uint8_t depth) +{ + int i; + uint8_t part_depth; + uint8_t prev_byte; + + for (i = 0, part_depth = depth; part_depth > 8; part_depth -= 8, i++) + ; + + prev_byte = ip[i]; + ip[i] += 1 << (8 - part_depth); + if (ip[i] < prev_byte) { + while (i > 0) { + ip[--i] += 1; + if (ip[i] != 0) + break; + } + } +} + +static int +modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib, + const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE], + uint8_t depth, uint64_t next_hop) +{ + struct rte_rib6_node *tmp = NULL; + uint8_t ledge[RTE_FIB6_IPV6_ADDR_SIZE]; + uint8_t redge[RTE_FIB6_IPV6_ADDR_SIZE]; + int ret; + uint8_t tmp_depth; + + if (next_hop > get_max_nh(dp->nh_sz)) + return -EINVAL; + + rte_rib6_copy_addr(ledge, ip); + do { + tmp = rte_rib6_get_nxt(rib, ip, depth, tmp, + RTE_RIB6_GET_NXT_COVER); + if (tmp != NULL) { + rte_rib6_get_depth(tmp, &tmp_depth); + if (tmp_depth == depth) + continue; + rte_rib6_get_ip(tmp, redge); + if (rte_rib6_is_equal(ledge, redge)) { + get_nxt_net(ledge, tmp_depth); + continue; + } + ret = install_to_dp(dp, ledge, redge, + next_hop); + if (ret != 0) + return ret; + get_nxt_net(redge, tmp_depth); + rte_rib6_copy_addr(ledge, redge); + } else { + rte_rib6_copy_addr(redge, ip); + get_nxt_net(redge, depth); + if (rte_rib6_is_equal(ledge, redge)) + break; + ret = install_to_dp(dp, ledge, redge, + next_hop); + if (ret != 0) + return ret; + } + } while (tmp); + + return 0; +} + +int +trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE], + uint8_t depth, uint64_t next_hop, int op) +{ + struct rte_trie_tbl *dp; + struct rte_rib6 *rib; + struct rte_rib6_node *tmp = NULL; + struct rte_rib6_node *node; + struct rte_rib6_node *parent; + uint8_t ip_masked[RTE_FIB6_IPV6_ADDR_SIZE]; + int i, ret = 0; + uint64_t par_nh, node_nh; + uint8_t tmp_depth, depth_diff = 0, parent_depth = 24; + + if ((fib == NULL) || (ip == NULL) || (depth > RTE_FIB6_MAXDEPTH)) + return -EINVAL; + + dp = rte_fib6_get_dp(fib); + RTE_ASSERT(dp); + rib = rte_fib6_get_rib(fib); + RTE_ASSERT(rib); + + for (i = 0; i < RTE_FIB6_IPV6_ADDR_SIZE; i++) + ip_masked[i] = ip[i] & get_msk_part(depth, i); + + if (depth > 24) { + tmp = rte_rib6_get_nxt(rib, ip_masked, + RTE_ALIGN_FLOOR(depth, 8), NULL, + RTE_RIB6_GET_NXT_COVER); + if (tmp == NULL) { + tmp = rte_rib6_lookup(rib, ip); + if (tmp != NULL) { + rte_rib6_get_depth(tmp, &tmp_depth); + parent_depth = RTE_MAX(tmp_depth, 24); + } + depth_diff = RTE_ALIGN_CEIL(depth, 8) - + RTE_ALIGN_CEIL(parent_depth, 8); + depth_diff = depth_diff >> 3; + } + } + node = rte_rib6_lookup_exact(rib, ip_masked, depth); + switch (op) { + case RTE_FIB6_ADD: + if (node != NULL) { + rte_rib6_get_nh(node, &node_nh); + if (node_nh == next_hop) + return 0; + ret = modify_dp(dp, rib, ip_masked, depth, next_hop); + if (ret == 0) + rte_rib6_set_nh(node, next_hop); + return 0; + } + + if ((depth > 24) && (dp->rsvd_tbl8s >= + dp->number_tbl8s - depth_diff)) + return -ENOSPC; + + node = rte_rib6_insert(rib, ip_masked, depth); + if (node == NULL) + return -rte_errno; + rte_rib6_set_nh(node, next_hop); + parent = rte_rib6_lookup_parent(node); + if (parent != NULL) { + rte_rib6_get_nh(parent, &par_nh); + if (par_nh == next_hop) + return 0; + } + ret = modify_dp(dp, rib, ip_masked, depth, next_hop); + if (ret != 0) { + rte_rib6_remove(rib, ip_masked, depth); + return ret; + } + + dp->rsvd_tbl8s += depth_diff; + return 0; + case RTE_FIB6_DEL: + if (node == NULL) + return -ENOENT; + + parent = rte_rib6_lookup_parent(node); + if (parent != NULL) { + rte_rib6_get_nh(parent, &par_nh); + rte_rib6_get_nh(node, &node_nh); + if (par_nh != node_nh) + ret = modify_dp(dp, rib, ip_masked, depth, + par_nh); + } else + ret = modify_dp(dp, rib, ip_masked, depth, dp->def_nh); + + if (ret != 0) + return ret; + rte_rib6_remove(rib, ip, depth); + + dp->rsvd_tbl8s -= depth_diff; + return 0; + default: + break; + } + return -EINVAL; +} + +void * +trie_create(const char *name, int socket_id, + struct rte_fib6_conf *conf) +{ + char mem_name[TRIE_NAMESIZE]; + struct rte_trie_tbl *dp = NULL; + uint64_t def_nh; + uint32_t num_tbl8; + enum rte_fib_trie_nh_sz nh_sz; + + if ((name == NULL) || (conf == NULL) || + (conf->trie.nh_sz < RTE_FIB6_TRIE_2B) || + (conf->trie.nh_sz > RTE_FIB6_TRIE_8B) || + (conf->trie.num_tbl8 > + get_max_nh(conf->trie.nh_sz)) || + (conf->trie.num_tbl8 == 0) || + (conf->default_nh > + get_max_nh(conf->trie.nh_sz))) { + + rte_errno = EINVAL; + return NULL; + } + + def_nh = conf->default_nh; + nh_sz = conf->trie.nh_sz; + num_tbl8 = conf->trie.num_tbl8; + + snprintf(mem_name, sizeof(mem_name), "DP_%s", name); + dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) + + TRIE_TBL24_NUM_ENT * (1 << nh_sz), RTE_CACHE_LINE_SIZE, + socket_id); + if (dp == NULL) { + rte_errno = ENOMEM; + return dp; + } + + write_to_dp(&dp->tbl24, (def_nh << 1), nh_sz, 1 << 24); + + snprintf(mem_name, sizeof(mem_name), "TBL8_%p", dp); + dp->tbl8 = rte_zmalloc_socket(mem_name, TRIE_TBL8_GRP_NUM_ENT * + (1ll << nh_sz) * (num_tbl8 + 1), + RTE_CACHE_LINE_SIZE, socket_id); + if (dp->tbl8 == NULL) { + rte_errno = ENOMEM; + rte_free(dp); + return NULL; + } + dp->def_nh = def_nh; + dp->nh_sz = nh_sz; + dp->number_tbl8s = num_tbl8; + + snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%p", dp); + dp->tbl8_pool = rte_zmalloc_socket(mem_name, + sizeof(uint32_t) * dp->number_tbl8s, + RTE_CACHE_LINE_SIZE, socket_id); + if (dp->tbl8_pool == NULL) { + rte_errno = ENOMEM; + rte_free(dp->tbl8); + rte_free(dp); + return NULL; + } + + tbl8_pool_init(dp); + + return dp; +} + +void +trie_free(void *p) +{ + struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p; + + rte_free(dp->tbl8_pool); + rte_free(dp->tbl8); + rte_free(dp); +} + diff --git a/lib/librte_fib/trie.h b/lib/librte_fib/trie.h new file mode 100644 index 0000000..7762fb9 --- /dev/null +++ b/lib/librte_fib/trie.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Vladimir Medvedkin + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _TRIE_H_ +#define _TRIE_H_ + +/** + * @file + * RTE IPv6 Longest Prefix Match (LPM) + */ + +#ifdef __cplusplus +extern "C" { +#endif + +void * +trie_create(const char *name, int socket_id, struct rte_fib6_conf *conf); + +void +trie_free(void *p); + +rte_fib6_lookup_fn_t +rte_trie_get_lookup_fn(struct rte_fib6_conf *fib_conf); + +int +trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE], + uint8_t depth, uint64_t next_hop, int op); + + +#ifdef __cplusplus +} +#endif + +#endif /* _TRIE_H_ */ +