diff mbox series

[v3,1/4] hash: add k32v64 hash library

Message ID	0e767e9171c4e90d57ec06b50d6bf3b7d79828b1.1586974411.git.vladimir.medvedkin@intel.com (mailing list archive)
State	Superseded, archived
Delegated to:	Thomas Monjalon
Headers	IronPort-SDR: 7boYSaw10P4iOc2vV339ke1/aSb0VEmCKEp2QvzUzuKY8MV6tYhdxhrjro/b7XavjD52S/G94w HbkRwSMHL9Qw== IronPort-SDR: LYC/uRUx8MZEAUQxXI2kYgnl6zBXUH/yAPZ8LQj9csAij0D/C7FiNVr3qOIkieVGPUo6BSYSlT 3D7N3YfQyKbA== From: Vladimir Medvedkin <vladimir.medvedkin@intel.com> To: dev@dpdk.org Cc: konstantin.ananyev@intel.com, yipeng1.wang@intel.com, sameh.gobriel@intel.com, bruce.richardson@intel.com Date: Wed, 15 Apr 2020 19:17:24 +0100 Message-Id: <0e767e9171c4e90d57ec06b50d6bf3b7d79828b1.1586974411.git.vladimir.medvedkin@intel.com> In-Reply-To: <cover.1586974408.git.vladimir.medvedkin@intel.com> References: <cover.1586974408.git.vladimir.medvedkin@intel.com> In-Reply-To: <cover.1586974408.git.vladimir.medvedkin@intel.com> References: <cover.1586369591.git.vladimir.medvedkin@intel.com> <cover.1586974408.git.vladimir.medvedkin@intel.com> Subject: [dpdk-dev] [PATCH v3 1/4] hash: add k32v64 hash library Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	add new k32v64 hash table \| [v3,0/4] add new k32v64 hash table [v3,1/4] hash: add k32v64 hash library [v3,2/4] hash: add documentation for k32v64 hash library [v3,3/4] test: add k32v64 hash autotests [v3,4/4] test: add k32v64 perf tests

Checks

Context	Check	Description
ci/checkpatch	success	coding style OK
ci/iol-intel-Performance	success	Performance Testing PASS
ci/iol-mellanox-Performance	success	Performance Testing PASS
ci/iol-testing	success	Testing PASS
ci/Intel-compilation	fail	Compilation issues

Commit Message

Vladimir Medvedkin April 15, 2020, 6:17 p.m. UTC

  K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
This table is hash function agnostic so user must provide
precalculated hash signature for add/delete/lookup operations.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/Makefile                           |   2 +-
 lib/librte_hash/Makefile               |  13 +-
 lib/librte_hash/k32v64_hash_avx512vl.c |  56 ++++++
 lib/librte_hash/meson.build            |  17 +-
 lib/librte_hash/rte_hash_version.map   |   6 +-
 lib/librte_hash/rte_k32v64_hash.c      | 315 +++++++++++++++++++++++++++++++++
 lib/librte_hash/rte_k32v64_hash.h      | 211 ++++++++++++++++++++++
 7 files changed, 615 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_hash/k32v64_hash_avx512vl.c
 create mode 100644 lib/librte_hash/rte_k32v64_hash.c
 create mode 100644 lib/librte_hash/rte_k32v64_hash.h

Comments

Mattias Rönnblom April 15, 2020, 6:59 p.m. UTC | #1

On 2020-04-15 20:17, Vladimir Medvedkin wrote:
> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
> This table is hash function agnostic so user must provide
> precalculated hash signature for add/delete/lookup operations.
>
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>   lib/Makefile                           |   2 +-
>   lib/librte_hash/Makefile               |  13 +-
>   lib/librte_hash/k32v64_hash_avx512vl.c |  56 ++++++
>   lib/librte_hash/meson.build            |  17 +-
>   lib/librte_hash/rte_hash_version.map   |   6 +-
>   lib/librte_hash/rte_k32v64_hash.c      | 315 +++++++++++++++++++++++++++++++++
>   lib/librte_hash/rte_k32v64_hash.h      | 211 ++++++++++++++++++++++
>   7 files changed, 615 insertions(+), 5 deletions(-)
>   create mode 100644 lib/librte_hash/k32v64_hash_avx512vl.c
>   create mode 100644 lib/librte_hash/rte_k32v64_hash.c
>   create mode 100644 lib/librte_hash/rte_k32v64_hash.h
>
> diff --git a/lib/Makefile b/lib/Makefile
> index 46b91ae..a8c02e4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
>   DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
>   			librte_net librte_hash librte_cryptodev
>   DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
> -DEPDIRS-librte_hash := librte_eal librte_ring
> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>   DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
>   DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
>   DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> index ec9f864..023689d 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -8,13 +8,14 @@ LIB = librte_hash.a
>   
>   CFLAGS += -O3
>   CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> -LDLIBS += -lrte_eal -lrte_ring
> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
>   
>   EXPORT_MAP := rte_hash_version.map
>   
>   # all source are stored in SRCS-y
>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_k32v64_hash.c
>   
>   # install this header file
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
> @@ -27,5 +28,15 @@ endif
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_k32v64_hash.h
> +
> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1 | \
> +grep -q __AVX512VL__ && echo 1)
> +
> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
> +	CFLAGS_rte_k32v64_hash.o += -DCC_AVX512VL_SUPPORT
> +endif
>   
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c b/lib/librte_hash/k32v64_hash_avx512vl.c
> new file mode 100644
> index 0000000..7c70dd2
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <rte_k32v64_hash.h>
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +
> +static inline int
> +k32v64_cmp_keys_avx512vl(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	__m128i keys, srch_key;
> +	__mmask8 msk;
> +
> +	keys = _mm_load_si128((void *)bucket);
> +	srch_key = _mm_set1_epi32(key);
> +
> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys, srch_key);
> +	if (msk) {
> +		*val = bucket->val[__builtin_ctz(msk)];
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +k32v64_hash_lookup_avx512vl(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +		k32v64_cmp_keys_avx512vl);
> +}
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n)
> +{
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
> index 6ab46ae..8a37cc4 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -3,10 +3,23 @@
>   
>   headers = files('rte_crc_arm64.h',
>   	'rte_fbk_hash.h',
> +	'rte_k32v64_hash.h',
>   	'rte_hash_crc.h',
>   	'rte_hash.h',
>   	'rte_jhash.h',
>   	'rte_thash.h')
>   
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
> -deps += ['ring']
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_k32v64_hash.c')
> +deps += ['ring', 'mempool']
> +
> +if dpdk_conf.has('RTE_ARCH_X86')
> +	if cc.has_argument('-mavx512vl')
> +		avx512_tmplib = static_library('avx512_tmp',
> +                                'k32v64_hash_avx512vl.c',
> +                                dependencies: static_rte_mempool,
> +                                c_args: cflags + ['-mavx512vl'])
> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
> +                cflags += '-DCC_AVX512VL_SUPPORT'
> +
> +	endif
> +endif
> diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
> index a8fbbc3..9a4f2f6 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -34,5 +34,9 @@ EXPERIMENTAL {
>   
>   	rte_hash_free_key_with_position;
>   	rte_hash_max_key_id;
> -
> +	rte_k32v64_hash_create;
> +	rte_k32v64_hash_find_existing;
> +	rte_k32v64_hash_free;
> +	rte_k32v64_hash_add;
> +	rte_k32v64_hash_delete;
>   };
> diff --git a/lib/librte_hash/rte_k32v64_hash.c b/lib/librte_hash/rte_k32v64_hash.c
> new file mode 100644
> index 0000000..7ed94b4
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.c
> @@ -0,0 +1,315 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_k32v64_hash.h>
> +
> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
> +	.name = "RTE_K32V64_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
> +
> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +#endif
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t *keys,
> +	uint32_t *hashes, uint64_t *values, unsigned int n)
> +{
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +static rte_k32v64_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +		return k32v64_hash_bulk_lookup_avx512vl;
> +#endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value)
> +{
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			table->t[bucket].val[i] = value;
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				ent->val = value;
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		rte_smp_wmb();
> +		table->t[bucket].key_mask |= 1 << idx;
> +		table->nb_ent++;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	return 0;
> +}
> +
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct rte_k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->nb_ext_ent--;
> +			} else
> +				table->t[bucket].key_mask &= ~(1 << i);
> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);
> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent, next);
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name)
> +{
> +	struct rte_k32v64_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +			rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		h = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params)
> +{
> +	char hash_name[RTE_K32V64_HASH_NAMESIZE];
> +	struct rte_k32v64_hash_table *ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +		rte_k32v64_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params->name);
> +	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
> +	mem_size = sizeof(struct rte_k32v64_hash_table) +
> +		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		ht = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(params->name, ht->name,
> +				RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	ht = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
> +		rte_free(te);
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	memcpy(ht->name, hash_name, sizeof(ht->name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->lookup = get_lookup_bulk_fn();
> +
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +}
> +
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht)
> +{
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +				rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(k32v64_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	rte_mempool_free(ht->ext_ent_pool);
> +	rte_free(ht);
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_k32v64_hash.h b/lib/librte_hash/rte_k32v64_hash.h
> new file mode 100644
> index 0000000..b2c52e9
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.h
> @@ -0,0 +1,211 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_K32V64_HASH_H_
> +#define _RTE_K32V64_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_K32V64_HASH_NAMESIZE		32
> +#define RTE_K32V64_KEYS_PER_BUCKET		4
> +#define RTE_K32V64_WRITE_IN_PROGRESS		1
> +
> +struct rte_k32v64_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +};
> +
> +struct rte_k32v64_ext_ent {
> +	SLIST_ENTRY(rte_k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct rte_k32v64_hash_bucket {
> +	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	rte_atomic32_t	cnt;
> +	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head;
> +} __rte_cache_aligned;
> +
> +struct rte_k32v64_hash_table;
> +
> +typedef int (*rte_k32v64_hash_bulk_lookup_t)
> +(struct rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
> +	uint64_t *values, unsigned int n);
> +
> +struct rte_k32v64_hash_table {
> +	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the hash. */
> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	rte_k32v64_hash_bulk_lookup_t	lookup;
> +	__extension__ struct rte_k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*rte_k32v64_cmp_fn_t)
> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	int i;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f)
> +{
> +	uint64_t	val = 0;
> +	struct rte_k32v64_ext_ent *ent;
> +	int32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +		do
> +			cnt = rte_atomic32_read(&table->t[bucket].cnt);
> +		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +
> +static inline int
> +rte_k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value, __k32v64_cmp_keys);
> +}
> +
> +static inline int
> +rte_k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n)
> +{
> +	return table->lookup(table, keys, hashes, values, n);
> +}
> +
> +/**
> + * Add a key to an existing hash table with hash value.
> + * This operation is not multi-thread safe
> + * and should only be called from one thread.
> + *


Does this hash allow multiple readers, and at most one writer, for a 
particular instance? If that's the case, it should probably be mentioned 
somewhere.


Just specifying that a particular function is not MT safe doesn't say 
anything if it's safe to call it in parallel with other, non-MT safe 
functions. I assume you can't add and delete in parallel?


> + * @param ht
> + *   Hash table to add the key to.
> + * @param key
> + *   Key to add to the hash table.
> + * @param value
> + *   Value to associate with key.
> + * @param hash
> + *   Hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value);
> +
> +/**
> + * Remove a key with a given hash value from an existing hash table.
> + * This operation is not multi-thread
> + * safe and should only be called from one thread.
> + *
> + * @param ht
> + *   Hash table to remove the key from.
> + * @param key
> + *   Key to remove from the hash table.
> + * @param hash
> + *   hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash);
> +
> +
> +/**
> + * Performs a lookup for an existing hash table, and returns a pointer to
> + * the table if found.
> + *
> + * @param name
> + *   Name of the hash table to find
> + *
> + * @return
> + *   pointer to hash table structure or NULL on error with rte_errno
> + *   set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name);
> +
> +/**
> + * Create a new hash table for use with four byte keys.
> + *
> + * @param params
> + *   Parameters used in creation of hash table.
> + *
> + * @return
> + *   Pointer to hash table structure that is used in future hash table
> + *   operations, or NULL on error with rte_errno set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params);
> +
> +/**
> + * Free all memory used by a hash table.
> + *
> + * @param table
> + *   Hash table to deallocate.
> + */
> +__rte_experimental
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *table);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_K32V64_HASH_H_ */

Vladimir Medvedkin April 16, 2020, 10:23 a.m. UTC | #2

Hi Mattias,

-----Original Message-----
From: Mattias Rönnblom <mattias.ronnblom@ericsson.com> 
Sent: Wednesday, April 15, 2020 8:00 PM
To: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>; dev@dpdk.org
Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Wang, Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>
Subject: Re: [dpdk-dev] [PATCH v3 1/4] hash: add k32v64 hash library

On 2020-04-15 20:17, Vladimir Medvedkin wrote:
> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
> This table is hash function agnostic so user must provide 
> precalculated hash signature for add/delete/lookup operations.
>
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>   lib/Makefile                           |   2 +-
>   lib/librte_hash/Makefile               |  13 +-
>   lib/librte_hash/k32v64_hash_avx512vl.c |  56 ++++++
>   lib/librte_hash/meson.build            |  17 +-
>   lib/librte_hash/rte_hash_version.map   |   6 +-
>   lib/librte_hash/rte_k32v64_hash.c      | 315 +++++++++++++++++++++++++++++++++
>   lib/librte_hash/rte_k32v64_hash.h      | 211 ++++++++++++++++++++++
>   7 files changed, 615 insertions(+), 5 deletions(-)
>   create mode 100644 lib/librte_hash/k32v64_hash_avx512vl.c
>   create mode 100644 lib/librte_hash/rte_k32v64_hash.c
>   create mode 100644 lib/librte_hash/rte_k32v64_hash.h
>
> diff --git a/lib/Makefile b/lib/Makefile index 46b91ae..a8c02e4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
>   DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
>   			librte_net librte_hash librte_cryptodev
>   DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash 
> := librte_eal librte_ring
> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>   DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
>   DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
>   DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git 
> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index 
> ec9f864..023689d 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -8,13 +8,14 @@ LIB = librte_hash.a
>   
>   CFLAGS += -O3
>   CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -LDLIBS += -lrte_eal 
> -lrte_ring
> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
>   
>   EXPORT_MAP := rte_hash_version.map
>   
>   # all source are stored in SRCS-y
>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_k32v64_hash.c
>   
>   # install this header file
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5 
> +28,15 @@ endif
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_k32v64_hash.h
> +
> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1 
> +| \ grep -q __AVX512VL__ && echo 1)
> +
> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
> +	CFLAGS_rte_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
>   
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c 
> b/lib/librte_hash/k32v64_hash_avx512vl.c
> new file mode 100644
> index 0000000..7c70dd2
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <rte_k32v64_hash.h>
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +
> +static inline int
> +k32v64_cmp_keys_avx512vl(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	__m128i keys, srch_key;
> +	__mmask8 msk;
> +
> +	keys = _mm_load_si128((void *)bucket);
> +	srch_key = _mm_set1_epi32(key);
> +
> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys, srch_key);
> +	if (msk) {
> +		*val = bucket->val[__builtin_ctz(msk)];
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +k32v64_hash_lookup_avx512vl(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +		k32v64_cmp_keys_avx512vl);
> +}
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) 
> +{
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build 
> index 6ab46ae..8a37cc4 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -3,10 +3,23 @@
>   
>   headers = files('rte_crc_arm64.h',
>   	'rte_fbk_hash.h',
> +	'rte_k32v64_hash.h',
>   	'rte_hash_crc.h',
>   	'rte_hash.h',
>   	'rte_jhash.h',
>   	'rte_thash.h')
>   
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += 
> ['ring']
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 
> +'rte_k32v64_hash.c') deps += ['ring', 'mempool']
> +
> +if dpdk_conf.has('RTE_ARCH_X86')
> +	if cc.has_argument('-mavx512vl')
> +		avx512_tmplib = static_library('avx512_tmp',
> +                                'k32v64_hash_avx512vl.c',
> +                                dependencies: static_rte_mempool,
> +                                c_args: cflags + ['-mavx512vl'])
> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
> +                cflags += '-DCC_AVX512VL_SUPPORT'
> +
> +	endif
> +endif
> diff --git a/lib/librte_hash/rte_hash_version.map 
> b/lib/librte_hash/rte_hash_version.map
> index a8fbbc3..9a4f2f6 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -34,5 +34,9 @@ EXPERIMENTAL {
>   
>   	rte_hash_free_key_with_position;
>   	rte_hash_max_key_id;
> -
> +	rte_k32v64_hash_create;
> +	rte_k32v64_hash_find_existing;
> +	rte_k32v64_hash_free;
> +	rte_k32v64_hash_add;
> +	rte_k32v64_hash_delete;
>   };
> diff --git a/lib/librte_hash/rte_k32v64_hash.c 
> b/lib/librte_hash/rte_k32v64_hash.c
> new file mode 100644
> index 0000000..7ed94b4
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.c
> @@ -0,0 +1,315 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_k32v64_hash.h>
> +
> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
> +	.name = "RTE_K32V64_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
> +
> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n); 
> +#endif
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t *keys,
> +	uint32_t *hashes, uint64_t *values, unsigned int n) {
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +static rte_k32v64_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +		return k32v64_hash_bulk_lookup_avx512vl; #endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value)
> +{
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			table->t[bucket].val[i] = value;
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				ent->val = value;
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		rte_smp_wmb();
> +		table->t[bucket].key_mask |= 1 << idx;
> +		table->nb_ent++;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	return 0;
> +}
> +
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct rte_k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->nb_ext_ent--;
> +			} else
> +				table->t[bucket].key_mask &= ~(1 << i);
> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);
> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent, next);
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name) {
> +	struct rte_k32v64_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +			rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		h = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params) {
> +	char hash_name[RTE_K32V64_HASH_NAMESIZE];
> +	struct rte_k32v64_hash_table *ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +		rte_k32v64_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params->name);
> +	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
> +	mem_size = sizeof(struct rte_k32v64_hash_table) +
> +		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		ht = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(params->name, ht->name,
> +				RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	ht = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
> +		rte_free(te);
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	memcpy(ht->name, hash_name, sizeof(ht->name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->lookup = get_lookup_bulk_fn();
> +
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +}
> +
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht) {
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +				rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(k32v64_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	rte_mempool_free(ht->ext_ent_pool);
> +	rte_free(ht);
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_k32v64_hash.h 
> b/lib/librte_hash/rte_k32v64_hash.h
> new file mode 100644
> index 0000000..b2c52e9
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.h
> @@ -0,0 +1,211 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#ifndef _RTE_K32V64_HASH_H_
> +#define _RTE_K32V64_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_K32V64_HASH_NAMESIZE		32
> +#define RTE_K32V64_KEYS_PER_BUCKET		4
> +#define RTE_K32V64_WRITE_IN_PROGRESS		1
> +
> +struct rte_k32v64_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +};
> +
> +struct rte_k32v64_ext_ent {
> +	SLIST_ENTRY(rte_k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct rte_k32v64_hash_bucket {
> +	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	rte_atomic32_t	cnt;
> +	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head; } 
> +__rte_cache_aligned;
> +
> +struct rte_k32v64_hash_table;
> +
> +typedef int (*rte_k32v64_hash_bulk_lookup_t) (struct 
> +rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
> +	uint64_t *values, unsigned int n);
> +
> +struct rte_k32v64_hash_table {
> +	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the hash. */
> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	rte_k32v64_hash_bulk_lookup_t	lookup;
> +	__extension__ struct rte_k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*rte_k32v64_cmp_fn_t)
> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	int i;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f) {
> +	uint64_t	val = 0;
> +	struct rte_k32v64_ext_ent *ent;
> +	int32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +		do
> +			cnt = rte_atomic32_read(&table->t[bucket].cnt);
> +		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +
> +static inline int
> +rte_k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value, 
> +__k32v64_cmp_keys); }
> +
> +static inline int
> +rte_k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) 
> +{
> +	return table->lookup(table, keys, hashes, values, n); }
> +
> +/**
> + * Add a key to an existing hash table with hash value.
> + * This operation is not multi-thread safe
> + * and should only be called from one thread.
> + *


Does this hash allow multiple readers, and at most one writer, for a particular instance? If that's the case, it should probably be mentioned somewhere.


Just specifying that a particular function is not MT safe doesn't say anything if it's safe to call it in parallel with other, non-MT safe functions. I assume you can't add and delete in parallel?


Yes, this hash allows multiple readers and on writer. Writers must be serialized. I will add this information. Thanks!


> + * @param ht
> + *   Hash table to add the key to.
> + * @param key
> + *   Key to add to the hash table.
> + * @param value
> + *   Value to associate with key.
> + * @param hash
> + *   Hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value);
> +
> +/**
> + * Remove a key with a given hash value from an existing hash table.
> + * This operation is not multi-thread
> + * safe and should only be called from one thread.
> + *
> + * @param ht
> + *   Hash table to remove the key from.
> + * @param key
> + *   Key to remove from the hash table.
> + * @param hash
> + *   hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash);
> +
> +
> +/**
> + * Performs a lookup for an existing hash table, and returns a 
> +pointer to
> + * the table if found.
> + *
> + * @param name
> + *   Name of the hash table to find
> + *
> + * @return
> + *   pointer to hash table structure or NULL on error with rte_errno
> + *   set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name);
> +
> +/**
> + * Create a new hash table for use with four byte keys.
> + *
> + * @param params
> + *   Parameters used in creation of hash table.
> + *
> + * @return
> + *   Pointer to hash table structure that is used in future hash table
> + *   operations, or NULL on error with rte_errno set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params);
> +
> +/**
> + * Free all memory used by a hash table.
> + *
> + * @param table
> + *   Hash table to deallocate.
> + */
> +__rte_experimental
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *table);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_K32V64_HASH_H_ */

Ananyev, Konstantin April 23, 2020, 1:31 p.m. UTC | #3

Hi Vladimir,

Apologies for late review.
My comments below. 

> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
> This table is hash function agnostic so user must provide
> precalculated hash signature for add/delete/lookup operations.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
> 
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.c
> @@ -0,0 +1,315 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_k32v64_hash.h>
> +
> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
> +	.name = "RTE_K32V64_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
> +
> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +#endif
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t *keys,
> +	uint32_t *hashes, uint64_t *values, unsigned int n)
> +{
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +static rte_k32v64_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +		return k32v64_hash_bulk_lookup_avx512vl;
> +#endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value)
> +{
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +

I think for add you also need to do update bucket.cnt
at the start/end of updates (as you do for del). 
 
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			table->t[bucket].val[i] = value;
> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				ent->val = value;
> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		rte_smp_wmb();
> +		table->t[bucket].key_mask |= 1 << idx;
> +		table->nb_ent++;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	return 0;
> +}
> +
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash)
> +{
> +	uint32_t bucket;
> +	int i;
> +	struct rte_k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				rte_atomic32_inc(&table->t[bucket].cnt);

I know that right now rte_atomic32 uses _sync gcc builtins underneath, 
so it should be safe.
But I think the proper way would be:
table->t[bucket].cnt++;
rte_smp_wmb();	
or as alternative probably use C11 atomic ACQUIRE/RELEASE

> +				table->t[bucket].key[i] = ent->key;
> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->nb_ext_ent--;
> +			} else
> +				table->t[bucket].key_mask &= ~(1 << i);

I think you protect that update with bucket.cnt.
From my perspective -a s a rule of thumb any update to the bucket/list
Should be within that transaction-start/transaction-end.

> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);
> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent, next);
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	rte_mempool_put(table->ext_ent_pool, ent);
> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name)
> +{
> +	struct rte_k32v64_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +			rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		h = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params)
> +{
> +	char hash_name[RTE_K32V64_HASH_NAMESIZE];
> +	struct rte_k32v64_hash_table *ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +		rte_k32v64_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params->name);
> +	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
> +	mem_size = sizeof(struct rte_k32v64_hash_table) +
> +		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		ht = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(params->name, ht->name,
> +				RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	ht = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
> +		rte_free(te);
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	memcpy(ht->name, hash_name, sizeof(ht->name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->lookup = get_lookup_bulk_fn();
> +
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +}
> +
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht)
> +{
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +				rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(k32v64_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	rte_mempool_free(ht->ext_ent_pool);
> +	rte_free(ht);
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_k32v64_hash.h b/lib/librte_hash/rte_k32v64_hash.h
> new file mode 100644
> index 0000000..b2c52e9
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.h
> @@ -0,0 +1,211 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_K32V64_HASH_H_
> +#define _RTE_K32V64_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_K32V64_HASH_NAMESIZE		32
> +#define RTE_K32V64_KEYS_PER_BUCKET		4
> +#define RTE_K32V64_WRITE_IN_PROGRESS		1
> +
> +struct rte_k32v64_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +};
> +
> +struct rte_k32v64_ext_ent {
> +	SLIST_ENTRY(rte_k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct rte_k32v64_hash_bucket {
> +	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	rte_atomic32_t	cnt;
> +	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head;
> +} __rte_cache_aligned;
> +
> +struct rte_k32v64_hash_table;
> +
> +typedef int (*rte_k32v64_hash_bulk_lookup_t)
> +(struct rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
> +	uint64_t *values, unsigned int n);
> +
> +struct rte_k32v64_hash_table {
> +	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the hash. */
> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	rte_k32v64_hash_bulk_lookup_t	lookup;
> +	__extension__ struct rte_k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*rte_k32v64_cmp_fn_t)
> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	int i;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f)
> +{
> +	uint64_t	val = 0;
> +	struct rte_k32v64_ext_ent *ent;
> +	int32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +		do
> +			cnt = rte_atomic32_read(&table->t[bucket].cnt);
> +		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));

AFAIK atomic32_read is just a normal read op, so it can be reordered with other ops.
So this construction doesn't protect you from races.
What you probably need here:

	do {
		cnt1  =   table->t[bucket].cnt;
		rte_smp_rmb();
		....
		rte_smp_rmb();
		cnt2 = table->t[bucket].cnt;
	while (cnt1 != cnt2 || (cnt1 & RTE_K32V64_WRITE_IN_PROGRESS) != 0)

> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +

Honnappa Nagarahalli April 29, 2020, 9:29 p.m. UTC | #4

Hi Vladimir,
	I am not sure which way the decision is made, but few comments inline. Please use C11 built-ins for synchronization.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Vladimir Medvedkin
> Sent: Wednesday, April 15, 2020 1:17 PM
> To: dev@dpdk.org
> Cc: konstantin.ananyev@intel.com; yipeng1.wang@intel.com;
> sameh.gobriel@intel.com; bruce.richardson@intel.com
> Subject: [dpdk-dev] [PATCH v3 1/4] hash: add k32v64 hash library
> 
> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
> This table is hash function agnostic so user must provide precalculated hash
> signature for add/delete/lookup operations.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/Makefile                           |   2 +-
>  lib/librte_hash/Makefile               |  13 +-
>  lib/librte_hash/k32v64_hash_avx512vl.c |  56 ++++++
>  lib/librte_hash/meson.build            |  17 +-
>  lib/librte_hash/rte_hash_version.map   |   6 +-
>  lib/librte_hash/rte_k32v64_hash.c      | 315
> +++++++++++++++++++++++++++++++++
>  lib/librte_hash/rte_k32v64_hash.h      | 211 ++++++++++++++++++++++
>  7 files changed, 615 insertions(+), 5 deletions(-)  create mode 100644
> lib/librte_hash/k32v64_hash_avx512vl.c
>  create mode 100644 lib/librte_hash/rte_k32v64_hash.c  create mode
> 100644 lib/librte_hash/rte_k32v64_hash.h
> 
> diff --git a/lib/Makefile b/lib/Makefile index 46b91ae..a8c02e4 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
> DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev
> \
>  			librte_net librte_hash librte_cryptodev
>  DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash :=
> librte_eal librte_ring
> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>  DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd  DEPDIRS-librte_efd :=
> librte_eal librte_ring librte_hash
>  DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git
> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
> ec9f864..023689d 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -8,13 +8,14 @@ LIB = librte_hash.a
> 
>  CFLAGS += -O3
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> -LDLIBS += -lrte_eal -lrte_ring
> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
> 
>  EXPORT_MAP := rte_hash_version.map
> 
>  # all source are stored in SRCS-y
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>  SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_k32v64_hash.c
> 
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5
> +28,15 @@ endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> rte_jhash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_k32v64_hash.h
> +
> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1
> |
> +\ grep -q __AVX512VL__ && echo 1)
> +
> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
> +	CFLAGS_rte_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c
> b/lib/librte_hash/k32v64_hash_avx512vl.c
> new file mode 100644
> index 0000000..7c70dd2
> --- /dev/null
> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <rte_k32v64_hash.h>
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +
> +static inline int
> +k32v64_cmp_keys_avx512vl(struct rte_k32v64_hash_bucket *bucket,
> uint32_t key,
> +	uint64_t *val)
> +{
> +	__m128i keys, srch_key;
> +	__mmask8 msk;
> +
> +	keys = _mm_load_si128((void *)bucket);
> +	srch_key = _mm_set1_epi32(key);
> +
> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys,
> srch_key);
> +	if (msk) {
> +		*val = bucket->val[__builtin_ctz(msk)];
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +k32v64_hash_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +		k32v64_cmp_keys_avx512vl);
> +}
> +
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) {
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae..8a37cc4 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -3,10 +3,23 @@
> 
>  headers = files('rte_crc_arm64.h',
>  	'rte_fbk_hash.h',
> +	'rte_k32v64_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
>  	'rte_thash.h')
> 
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += ['ring']
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c',
> +'rte_k32v64_hash.c') deps += ['ring', 'mempool']
> +
> +if dpdk_conf.has('RTE_ARCH_X86')
> +	if cc.has_argument('-mavx512vl')
> +		avx512_tmplib = static_library('avx512_tmp',
> +                                'k32v64_hash_avx512vl.c',
> +                                dependencies: static_rte_mempool,
> +                                c_args: cflags + ['-mavx512vl'])
> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
> +                cflags += '-DCC_AVX512VL_SUPPORT'
> +
> +	endif
> +endif
> diff --git a/lib/librte_hash/rte_hash_version.map
> b/lib/librte_hash/rte_hash_version.map
> index a8fbbc3..9a4f2f6 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -34,5 +34,9 @@ EXPERIMENTAL {
> 
>  	rte_hash_free_key_with_position;
>  	rte_hash_max_key_id;
> -
> +	rte_k32v64_hash_create;
> +	rte_k32v64_hash_find_existing;
> +	rte_k32v64_hash_free;
> +	rte_k32v64_hash_add;
> +	rte_k32v64_hash_delete;
>  };
> diff --git a/lib/librte_hash/rte_k32v64_hash.c
> b/lib/librte_hash/rte_k32v64_hash.c
> new file mode 100644
> index 0000000..7ed94b4
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.c
> @@ -0,0 +1,315 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <string.h>
> +
> +#include <rte_eal_memconfig.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memory.h>
> +#include <rte_tailq.h>
> +
> +#include <rte_k32v64_hash.h>
> +
> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
> +
> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
> +	.name = "RTE_K32V64_HASH",
> +};
> +
> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
> +
> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
> +
> +#ifdef CC_AVX512VL_SUPPORT
> +int
> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
> +#endif
> +
> +static int
> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t
> *keys,
> +	uint32_t *hashes, uint64_t *values, unsigned int n) {
> +	int ret, cnt = 0;
> +	unsigned int i;
> +
> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
> +			(values == NULL)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
> +			&values[i]);
> +		if (ret == 0)
> +			cnt++;
> +	}
> +	return cnt;
> +}
> +
> +static rte_k32v64_hash_bulk_lookup_t
> +get_lookup_bulk_fn(void)
> +{
> +#ifdef CC_AVX512VL_SUPPORT
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
> +		return k32v64_hash_bulk_lookup_avx512vl; #endif
> +	return k32v64_hash_bulk_lookup;
> +}
> +
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value)
> +{
> +	uint32_t bucket;
> +	int i, idx, ret;
> +	uint8_t msk;
> +	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +	/* Search key in table. Update value if exists */
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			table->t[bucket].val[i] = value;
The old value of val[i] might be getting used in the reader. It needs to be returned to the caller so that it can be put on the RCU defer queue to free later.
You might also want to use an atomic store to ensure the stores are not split.

> +			return 0;
> +		}
> +	}
> +
> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +			if (ent->key == key) {
> +				ent->val = value;
Same here, need to return the old value.

> +				return 0;
> +			}
> +		}
> +	}
> +
> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
> +	if (msk) {
> +		idx = __builtin_ctz(msk);
> +		table->t[bucket].key[idx] = key;
> +		table->t[bucket].val[idx] = value;
> +		rte_smp_wmb();
> +		table->t[bucket].key_mask |= 1 << idx;
The barrier and the store can be replaced with a store-release in C11.

> +		table->nb_ent++;
> +		return 0;
> +	}
> +
> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
> +	if (ret < 0)
> +		return ret;
> +
> +	SLIST_NEXT(ent, next) = NULL;
> +	ent->key = key;
> +	ent->val = value;
> +	rte_smp_wmb();
> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
> +		prev = tmp;
> +
> +	if (prev == NULL)
> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
> +	else
> +		SLIST_INSERT_AFTER(prev, ent, next);
Need C11 SLIST

> +
> +	table->nb_ent++;
> +	table->nb_ext_ent++;
> +	return 0;
> +}
> +
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash)
This should return the value corresponding to the deleted key

> +{
> +	uint32_t bucket;
> +	int i;
> +	struct rte_k32v64_ext_ent *ent;
> +
> +	if (table == NULL)
> +		return -EINVAL;
> +
> +	bucket = hash & table->bucket_msk;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == table->t[bucket].key[i]) &&
> +				(table->t[bucket].key_mask & (1 << i))) {
> +			ent = SLIST_FIRST(&table->t[bucket].head);
> +			if (ent) {
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->t[bucket].key[i] = ent->key;
In this case, both key and value are changing and it is not atomic. There is a possibility that the lookup function will receive incorrect value of 'val[i]'. Suggest following the method described below.

> +				table->t[bucket].val[i] = ent->val;
> +				SLIST_REMOVE_HEAD(&table->t[bucket].head,
> next);
> +				rte_atomic32_inc(&table->t[bucket].cnt);
> +				table->nb_ext_ent--;
> +			} else
Suggest changing this into a 2 step process:
1) Delete the entry from the fixed bucket (update key_mask)
2) Move the head of extended bucket to the fixed bucket
	2a) Insert the key/value from the head into the deleted index and update the key_mask (this will ensure that the reader has the entry available while it is being removed from the extended bucket)
	2b) increment the counter indicating the extended bucket is changing
	2c) remove the head
	2d) increment the counter indicating the extended bucket change is done

The above procedure will result in removing the spinning (resulting from step 2b) in the lookup function (comment is added in the lookup function), readers will not be blocked if the writer is scheduled out.

This logic is implemented in rte_hash using cuckoo hash, suggest to take a look for required memory orderings.

> +				table->t[bucket].key_mask &= ~(1 << i);
> +			if (ent)
> +				rte_mempool_put(table->ext_ent_pool, ent);
Entry cannot be put to free list immediately as the readers are still using it.

> +			table->nb_ent--;
> +			return 0;
> +		}
> +	}
> +
> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
> +		if (ent->key == key)
> +			break;
> +
> +	if (ent == NULL)
> +		return -ENOENT;
> +
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent,
> next);
> +	rte_atomic32_inc(&table->t[bucket].cnt);
> +	rte_mempool_put(table->ext_ent_pool, ent);
Entry might be getting used by the readers still.

> +
> +	table->nb_ext_ent--;
> +	table->nb_ent--;
> +
> +	return 0;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name) {
> +	struct rte_k32v64_hash_table *h = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +			rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		h = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE)
> == 0)
> +			break;
> +	}
> +	rte_mcfg_tailq_read_unlock();
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +	return h;
> +}
> +
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params) {
> +	char hash_name[RTE_K32V64_HASH_NAMESIZE];
> +	struct rte_k32v64_hash_table *ht = NULL;
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +	uint32_t mem_size, nb_buckets, max_ent;
> +	int ret;
> +	struct rte_mempool *mp;
> +
> +	if ((params == NULL) || (params->name == NULL) ||
> +			(params->entries == 0)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +		rte_k32v64_hash_list);
> +
> +	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params-
> >name);
> +	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
> +		rte_errno = ENAMETOOLONG;
> +		return NULL;
> +	}
> +
> +	max_ent = rte_align32pow2(params->entries);
> +	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
> +	mem_size = sizeof(struct rte_k32v64_hash_table) +
> +		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
> +
> +	mp = rte_mempool_create(hash_name, max_ent,
> +		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL,
> NULL,
> +		params->socket_id, 0);
> +
> +	if (mp == NULL)
> +		return NULL;
> +
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		ht = (struct rte_k32v64_hash_table *) te->data;
> +		if (strncmp(params->name, ht->name,
> +				RTE_K32V64_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	ht = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	ht = rte_zmalloc_socket(hash_name, mem_size,
> +		RTE_CACHE_LINE_SIZE, params->socket_id);
> +	if (ht == NULL) {
> +		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
> +		rte_free(te);
> +		rte_mempool_free(mp);
> +		goto exit;
> +	}
> +
> +	memcpy(ht->name, hash_name, sizeof(ht->name));
> +	ht->max_ent = max_ent;
> +	ht->bucket_msk = nb_buckets - 1;
> +	ht->ext_ent_pool = mp;
> +	ht->lookup = get_lookup_bulk_fn();
> +
> +	te->data = (void *)ht;
> +	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
> +
> +exit:
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ht;
> +}
> +
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht) {
> +	struct rte_tailq_entry *te;
> +	struct rte_k32v64_hash_list *k32v64_hash_list;
> +
> +	if (ht == NULL)
> +		return;
> +
> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
> +				rte_k32v64_hash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* find out tailq entry */
> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
> +		if (te->data == (void *) ht)
> +			break;
> +	}
> +
> +
> +	if (te == NULL) {
> +		rte_mcfg_tailq_write_unlock();
> +		return;
> +	}
> +
> +	TAILQ_REMOVE(k32v64_hash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	rte_mempool_free(ht->ext_ent_pool);
> +	rte_free(ht);
> +	rte_free(te);
> +}
> diff --git a/lib/librte_hash/rte_k32v64_hash.h
> b/lib/librte_hash/rte_k32v64_hash.h
> new file mode 100644
> index 0000000..b2c52e9
> --- /dev/null
> +++ b/lib/librte_hash/rte_k32v64_hash.h
> @@ -0,0 +1,211 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_K32V64_HASH_H_
> +#define _RTE_K32V64_HASH_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_compat.h>
> +#include <rte_atomic.h>
> +#include <rte_mempool.h>
> +
> +#define RTE_K32V64_HASH_NAMESIZE		32
> +#define RTE_K32V64_KEYS_PER_BUCKET		4
> +#define RTE_K32V64_WRITE_IN_PROGRESS		1
> +
> +struct rte_k32v64_hash_params {
> +	const char *name;
> +	uint32_t entries;
> +	int socket_id;
> +};
> +
> +struct rte_k32v64_ext_ent {
> +	SLIST_ENTRY(rte_k32v64_ext_ent) next;
> +	uint32_t	key;
> +	uint64_t	val;
> +};
> +
> +struct rte_k32v64_hash_bucket {
> +	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
> +	uint8_t		key_mask;
> +	rte_atomic32_t	cnt;
> +	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head; }
> +__rte_cache_aligned;
> +
> +struct rte_k32v64_hash_table;
> +
> +typedef int (*rte_k32v64_hash_bulk_lookup_t) (struct
> +rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
> +	uint64_t *values, unsigned int n);
> +
> +struct rte_k32v64_hash_table {
> +	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the
> hash. */
> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
> +	uint32_t	max_ent;	/**< Maximum number of entities */
> +	uint32_t	bucket_msk;
> +	struct rte_mempool	*ext_ent_pool;
> +	rte_k32v64_hash_bulk_lookup_t	lookup;
> +	__extension__ struct rte_k32v64_hash_bucket	t[0];
> +};
> +
> +typedef int (*rte_k32v64_cmp_fn_t)
> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
> +
> +static inline int
> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
> +	uint64_t *val)
> +{
> +	int i;
> +
> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
> +		if ((key == bucket->key[i]) &&
> +				(bucket->key_mask & (1 << i))) {
> +			*val = bucket->val[i];
This load can happen speculatively.
You have to use the guard variable and payload concept here for reader-writer concurrency.
On the writer:
store -> key
store -> val
store_release -> key_mask (or there will be a rte_smp_wmb before this) 'key_mask' acts as the guard variable

On the reader:
load_acquire -> key_mask (or there will be a rte_smp_rmb after this)
if statement etc.....

> +			return 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int
> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f) {
> +	uint64_t	val = 0;
> +	struct rte_k32v64_ext_ent *ent;
> +	int32_t	cnt;
> +	int found = 0;
> +	uint32_t bucket = hash & table->bucket_msk;
> +
> +	do {
> +		do
> +			cnt = rte_atomic32_read(&table->t[bucket].cnt);
> +		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
The reader can hang here if the writer increments the counter and gets scheduled out. Using the method suggested above, you can remove this code.

> +
> +		found = cmp_f(&table->t[bucket], key, &val);
> +		if (unlikely((found == 0) &&
> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
> +				if (ent->key == key) {
> +					val = ent->val;
> +					found = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
With the logic mentioned in delete function, the counter needs to be read at the beginning of the loop. Suggest looking at rte_hash-cuckoo hash for the memory ordering required for the counter.

> +
> +	if (found == 1) {
> +		*value = val;
> +		return 0;
> +	} else
> +		return -ENOENT;
> +}
> +
> +static inline int
> +rte_k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t *value)
> +{
> +	return __k32v64_hash_lookup(table, key, hash, value,
> +__k32v64_cmp_keys); }
> +
> +static inline int
> +rte_k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table,
> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) {
> +	return table->lookup(table, keys, hashes, values, n); }
> +
> +/**
> + * Add a key to an existing hash table with hash value.
> + * This operation is not multi-thread safe
> + * and should only be called from one thread.
> + *
> + * @param ht
> + *   Hash table to add the key to.
> + * @param key
> + *   Key to add to the hash table.
> + * @param value
> + *   Value to associate with key.
> + * @param hash
> + *   Hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash, uint64_t value);
> +
> +/**
> + * Remove a key with a given hash value from an existing hash table.
> + * This operation is not multi-thread
> + * safe and should only be called from one thread.
> + *
> + * @param ht
> + *   Hash table to remove the key from.
> + * @param key
> + *   Key to remove from the hash table.
> + * @param hash
> + *   hash value associated with key.
> + * @return
> + *   0 if ok, or negative value on error.
> + */
> +__rte_experimental
> +int
> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
> +	uint32_t hash);
> +
> +
> +/**
> + * Performs a lookup for an existing hash table, and returns a pointer
> +to
> + * the table if found.
> + *
> + * @param name
> + *   Name of the hash table to find
> + *
> + * @return
> + *   pointer to hash table structure or NULL on error with rte_errno
> + *   set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_find_existing(const char *name);
> +
> +/**
> + * Create a new hash table for use with four byte keys.
> + *
> + * @param params
> + *   Parameters used in creation of hash table.
> + *
> + * @return
> + *   Pointer to hash table structure that is used in future hash table
> + *   operations, or NULL on error with rte_errno set appropriately.
> + */
> +__rte_experimental
> +struct rte_k32v64_hash_table *
> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params);
> +
> +/**
> + * Free all memory used by a hash table.
> + *
> + * @param table
> + *   Hash table to deallocate.
> + */
> +__rte_experimental
> +void
> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *table);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_K32V64_HASH_H_ */
> --
> 2.7.4

Vladimir Medvedkin May 8, 2020, 8:14 p.m. UTC | #5

Hi Konstantin,

Thanks for review,

On 23/04/2020 14:31, Ananyev, Konstantin wrote:
> Hi Vladimir,
>
> Apologies for late review.
> My comments below.
>
>> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
>> This table is hash function agnostic so user must provide
>> precalculated hash signature for add/delete/lookup operations.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_k32v64_hash.c
>> @@ -0,0 +1,315 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <rte_eal_memconfig.h>
>> +#include <rte_errno.h>
>> +#include <rte_malloc.h>
>> +#include <rte_memory.h>
>> +#include <rte_tailq.h>
>> +
>> +#include <rte_k32v64_hash.h>
>> +
>> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
>> +
>> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
>> +.name = "RTE_K32V64_HASH",
>> +};
>> +
>> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
>> +
>> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
>> +
>> +#ifdef CC_AVX512VL_SUPPORT
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
>> +uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
>> +#endif
>> +
>> +static int
>> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t *keys,
>> +uint32_t *hashes, uint64_t *values, unsigned int n)
>> +{
>> +int ret, cnt = 0;
>> +unsigned int i;
>> +
>> +if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +(values == NULL)))
>> +return -EINVAL;
>> +
>> +for (i = 0; i < n; i++) {
>> +ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
>> +&values[i]);
>> +if (ret == 0)
>> +cnt++;
>> +}
>> +return cnt;
>> +}
>> +
>> +static rte_k32v64_hash_bulk_lookup_t
>> +get_lookup_bulk_fn(void)
>> +{
>> +#ifdef CC_AVX512VL_SUPPORT
>> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
>> +return k32v64_hash_bulk_lookup_avx512vl;
>> +#endif
>> +return k32v64_hash_bulk_lookup;
>> +}
>> +
>> +int
>> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t value)
>> +{
>> +uint32_t bucket;
>> +int i, idx, ret;
>> +uint8_t msk;
>> +struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
> I think for add you also need to do update bucket.cnt
> at the start/end of updates (as you do for del).


Agree. We can not guarantee atomic update for 64bit value on 32bit arch.
But I think it is better to make transaction as small as possible so I 
update bucket.cnt not on start/end but right before and after key/value 
rewrite.


>
>> +bucket = hash & table->bucket_msk;
>> +/* Search key in table. Update value if exists */
>> +for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +table->t[bucket].val[i] = value;
>> +return 0;
>> +}
>> +}
>> +
>> +if (!SLIST_EMPTY(&table->t[bucket].head)) {
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +if (ent->key == key) {
>> +ent->val = value;
>> +return 0;
>> +}
>> +}
>> +}
>> +
>> +msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
>> +if (msk) {
>> +idx = __builtin_ctz(msk);
>> +table->t[bucket].key[idx] = key;
>> +table->t[bucket].val[idx] = value;
>> +rte_smp_wmb();
>> +table->t[bucket].key_mask |= 1 << idx;
>> +table->nb_ent++;
>> +return 0;
>> +}
>> +
>> +ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
>> +if (ret < 0)
>> +return ret;
>> +
>> +SLIST_NEXT(ent, next) = NULL;
>> +ent->key = key;
>> +ent->val = value;
>> +rte_smp_wmb();
>> +SLIST_FOREACH(tmp, &table->t[bucket].head, next)
>> +prev = tmp;
>> +
>> +if (prev == NULL)
>> +SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
>> +else
>> +SLIST_INSERT_AFTER(prev, ent, next);
>> +
>> +table->nb_ent++;
>> +table->nb_ext_ent++;
>> +return 0;
>> +}
>> +
>> +int
>> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash)
>> +{
>> +uint32_t bucket;
>> +int i;
>> +struct rte_k32v64_ext_ent *ent;
>> +
>> +if (table == NULL)
>> +return -EINVAL;
>> +
>> +bucket = hash & table->bucket_msk;
>> +
>> +for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == table->t[bucket].key[i]) &&
>> +(table->t[bucket].key_mask & (1 << i))) {
>> +ent = SLIST_FIRST(&table->t[bucket].head);
>> +if (ent) {
>> +rte_atomic32_inc(&table->t[bucket].cnt);
> I know that right now rte_atomic32 uses _sync gcc builtins underneath,
> so it should be safe.
> But I think the proper way would be:
> table->t[bucket].cnt++;
> rte_smp_wmb();
> or as alternative probably use C11 atomic ACQUIRE/RELEASE


Agree.


>
>> +table->t[bucket].key[i] = ent->key;
>> +table->t[bucket].val[i] = ent->val;
>> +SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
>> +rte_atomic32_inc(&table->t[bucket].cnt);
>> +table->nb_ext_ent--;
>> +} else
>> +table->t[bucket].key_mask &= ~(1 << i);
> I think you protect that update with bucket.cnt.
>  From my perspective -a s a rule of thumb any update to the bucket/list
> Should be within that transaction-start/transaction-end.


I think it is possible to update key_mask with C11 atomic


>> +if (ent)
>> +rte_mempool_put(table->ext_ent_pool, ent);
>> +table->nb_ent--;
>> +return 0;
>> +}
>> +}
>> +
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next)
>> +if (ent->key == key)
>> +break;
>> +
>> +if (ent == NULL)
>> +return -ENOENT;
>> +
>> +rte_atomic32_inc(&table->t[bucket].cnt);
>> +SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent, next);
>> +rte_atomic32_inc(&table->t[bucket].cnt);
>> +rte_mempool_put(table->ext_ent_pool, ent);
>> +
>> +table->nb_ext_ent--;
>> +table->nb_ent--;
>> +
>> +return 0;
>> +}
>> +
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_find_existing(const char *name)
>> +{
>> +struct rte_k32v64_hash_table *h = NULL;
>> +struct rte_tailq_entry *te;
>> +struct rte_k32v64_hash_list *k32v64_hash_list;
>> +
>> +k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +rte_k32v64_hash_list);
>> +
>> +rte_mcfg_tailq_read_lock();
>> +TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +h = (struct rte_k32v64_hash_table *) te->data;
>> +if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE) == 0)
>> +break;
>> +}
>> +rte_mcfg_tailq_read_unlock();
>> +if (te == NULL) {
>> +rte_errno = ENOENT;
>> +return NULL;
>> +}
>> +return h;
>> +}
>> +
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params)
>> +{
>> +char hash_name[RTE_K32V64_HASH_NAMESIZE];
>> +struct rte_k32v64_hash_table *ht = NULL;
>> +struct rte_tailq_entry *te;
>> +struct rte_k32v64_hash_list *k32v64_hash_list;
>> +uint32_t mem_size, nb_buckets, max_ent;
>> +int ret;
>> +struct rte_mempool *mp;
>> +
>> +if ((params == NULL) || (params->name == NULL) ||
>> +(params->entries == 0)) {
>> +rte_errno = EINVAL;
>> +return NULL;
>> +}
>> +
>> +k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +rte_k32v64_hash_list);
>> +
>> +ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params->name);
>> +if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
>> +rte_errno = ENAMETOOLONG;
>> +return NULL;
>> +}
>> +
>> +max_ent = rte_align32pow2(params->entries);
>> +nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
>> +mem_size = sizeof(struct rte_k32v64_hash_table) +
>> +sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
>> +
>> +mp = rte_mempool_create(hash_name, max_ent,
>> +sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
>> +params->socket_id, 0);
>> +
>> +if (mp == NULL)
>> +return NULL;
>> +
>> +rte_mcfg_tailq_write_lock();
>> +TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +ht = (struct rte_k32v64_hash_table *) te->data;
>> +if (strncmp(params->name, ht->name,
>> +RTE_K32V64_HASH_NAMESIZE) == 0)
>> +break;
>> +}
>> +ht = NULL;
>> +if (te != NULL) {
>> +rte_errno = EEXIST;
>> +rte_mempool_free(mp);
>> +goto exit;
>> +}
>> +
>> +te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
>> +if (te == NULL) {
>> +RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
>> +rte_mempool_free(mp);
>> +goto exit;
>> +}
>> +
>> +ht = rte_zmalloc_socket(hash_name, mem_size,
>> +RTE_CACHE_LINE_SIZE, params->socket_id);
>> +if (ht == NULL) {
>> +RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
>> +rte_free(te);
>> +rte_mempool_free(mp);
>> +goto exit;
>> +}
>> +
>> +memcpy(ht->name, hash_name, sizeof(ht->name));
>> +ht->max_ent = max_ent;
>> +ht->bucket_msk = nb_buckets - 1;
>> +ht->ext_ent_pool = mp;
>> +ht->lookup = get_lookup_bulk_fn();
>> +
>> +te->data = (void *)ht;
>> +TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
>> +
>> +exit:
>> +rte_mcfg_tailq_write_unlock();
>> +
>> +return ht;
>> +}
>> +
>> +void
>> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht)
>> +{
>> +struct rte_tailq_entry *te;
>> +struct rte_k32v64_hash_list *k32v64_hash_list;
>> +
>> +if (ht == NULL)
>> +return;
>> +
>> +k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +rte_k32v64_hash_list);
>> +
>> +rte_mcfg_tailq_write_lock();
>> +
>> +/* find out tailq entry */
>> +TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +if (te->data == (void *) ht)
>> +break;
>> +}
>> +
>> +
>> +if (te == NULL) {
>> +rte_mcfg_tailq_write_unlock();
>> +return;
>> +}
>> +
>> +TAILQ_REMOVE(k32v64_hash_list, te, next);
>> +
>> +rte_mcfg_tailq_write_unlock();
>> +
>> +rte_mempool_free(ht->ext_ent_pool);
>> +rte_free(ht);
>> +rte_free(te);
>> +}
>> diff --git a/lib/librte_hash/rte_k32v64_hash.h b/lib/librte_hash/rte_k32v64_hash.h
>> new file mode 100644
>> index 0000000..b2c52e9
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_k32v64_hash.h
>> @@ -0,0 +1,211 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#ifndef _RTE_K32V64_HASH_H_
>> +#define _RTE_K32V64_HASH_H_
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <rte_compat.h>
>> +#include <rte_atomic.h>
>> +#include <rte_mempool.h>
>> +
>> +#define RTE_K32V64_HASH_NAMESIZE32
>> +#define RTE_K32V64_KEYS_PER_BUCKET4
>> +#define RTE_K32V64_WRITE_IN_PROGRESS1
>> +
>> +struct rte_k32v64_hash_params {
>> +const char *name;
>> +uint32_t entries;
>> +int socket_id;
>> +};
>> +
>> +struct rte_k32v64_ext_ent {
>> +SLIST_ENTRY(rte_k32v64_ext_ent) next;
>> +uint32_tkey;
>> +uint64_tval;
>> +};
>> +
>> +struct rte_k32v64_hash_bucket {
>> +uint32_tkey[RTE_K32V64_KEYS_PER_BUCKET];
>> +uint64_tval[RTE_K32V64_KEYS_PER_BUCKET];
>> +uint8_tkey_mask;
>> +rte_atomic32_tcnt;
>> +SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head;
>> +} __rte_cache_aligned;
>> +
>> +struct rte_k32v64_hash_table;
>> +
>> +typedef int (*rte_k32v64_hash_bulk_lookup_t)
>> +(struct rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
>> +uint64_t *values, unsigned int n);
>> +
>> +struct rte_k32v64_hash_table {
>> +char name[RTE_K32V64_HASH_NAMESIZE];/**< Name of the hash. */
>> +uint32_tnb_ent;/**< Number of entities in the table*/
>> +uint32_tnb_ext_ent;/**< Number of extended entities */
>> +uint32_tmax_ent;/**< Maximum number of entities */
>> +uint32_tbucket_msk;
>> +struct rte_mempool*ext_ent_pool;
>> +rte_k32v64_hash_bulk_lookup_tlookup;
>> +__extension__ struct rte_k32v64_hash_buckett[0];
>> +};
>> +
>> +typedef int (*rte_k32v64_cmp_fn_t)
>> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
>> +
>> +static inline int
>> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
>> +uint64_t *val)
>> +{
>> +int i;
>> +
>> +for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +if ((key == bucket->key[i]) &&
>> +(bucket->key_mask & (1 << i))) {
>> +*val = bucket->val[i];
>> +return 1;
>> +}
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static inline int
>> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
>> +uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f)
>> +{
>> +uint64_tval = 0;
>> +struct rte_k32v64_ext_ent *ent;
>> +int32_tcnt;
>> +int found = 0;
>> +uint32_t bucket = hash & table->bucket_msk;
>> +
>> +do {
>> +do
>> +cnt = rte_atomic32_read(&table->t[bucket].cnt);
>> +while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
>> +
>> +found = cmp_f(&table->t[bucket], key, &val);
>> +if (unlikely((found == 0) &&
>> +(!SLIST_EMPTY(&table->t[bucket].head)))) {
>> +SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +if (ent->key == key) {
>> +val = ent->val;
>> +found = 1;
>> +break;
>> +}
>> +}
>> +}
>> +
>> +} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
> AFAIK atomic32_read is just a normal read op, so it can be reordered with other ops.
> So this construction doesn't protect you from races.
> What you probably need here:
>
> do {
> cnt1  =   table->t[bucket].cnt;
> rte_smp_rmb();
> ....
> rte_smp_rmb();
> cnt2 = table->t[bucket].cnt;
> while (cnt1 != cnt2 || (cnt1 & RTE_K32V64_WRITE_IN_PROGRESS) != 0)


Agree, this reads could be reordered. Replace it with C11 atomics.


>
>> +
>> +if (found == 1) {
>> +*value = val;
>> +return 0;
>> +} else
>> +return -ENOENT;
>> +}
>> +

Vladimir Medvedkin May 8, 2020, 8:38 p.m. UTC | #6

Hi Honnappa,

Thanks for the comments.

On 29/04/2020 22:29, Honnappa Nagarahalli wrote:
> Hi Vladimir,
> 	I am not sure which way the decision is made, but few comments inline. Please use C11 built-ins for synchronization.
>
>> -----Original Message-----
>> From: dev<dev-bounces@dpdk.org>  On Behalf Of Vladimir Medvedkin
>> Sent: Wednesday, April 15, 2020 1:17 PM
>> To:dev@dpdk.org
>> Cc:konstantin.ananyev@intel.com;yipeng1.wang@intel.com;
>> sameh.gobriel@intel.com;bruce.richardson@intel.com
>> Subject: [dpdk-dev] [PATCH v3 1/4] hash: add k32v64 hash library
>>
>> K32V64 hash is a hash table that supports 32 bit keys and 64 bit values.
>> This table is hash function agnostic so user must provide precalculated hash
>> signature for add/delete/lookup operations.
>>
>> Signed-off-by: Vladimir Medvedkin<vladimir.medvedkin@intel.com>
>> ---
>>   lib/Makefile                           |   2 +-
>>   lib/librte_hash/Makefile               |  13 +-
>>   lib/librte_hash/k32v64_hash_avx512vl.c |  56 ++++++
>>   lib/librte_hash/meson.build            |  17 +-
>>   lib/librte_hash/rte_hash_version.map   |   6 +-
>>   lib/librte_hash/rte_k32v64_hash.c      | 315
>> +++++++++++++++++++++++++++++++++
>>   lib/librte_hash/rte_k32v64_hash.h      | 211 ++++++++++++++++++++++
>>   7 files changed, 615 insertions(+), 5 deletions(-)  create mode 100644
>> lib/librte_hash/k32v64_hash_avx512vl.c
>>   create mode 100644 lib/librte_hash/rte_k32v64_hash.c  create mode
>> 100644 lib/librte_hash/rte_k32v64_hash.h
>>
>> diff --git a/lib/Makefile b/lib/Makefile index 46b91ae..a8c02e4 100644
>> --- a/lib/Makefile
>> +++ b/lib/Makefile
>> @@ -48,7 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
>> DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev
>> \
>>   			librte_net librte_hash librte_cryptodev
>>   DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash -DEPDIRS-librte_hash :=
>> librte_eal librte_ring
>> +DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
>>   DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd  DEPDIRS-librte_efd :=
>> librte_eal librte_ring librte_hash
>>   DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib diff --git
>> a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
>> ec9f864..023689d 100644
>> --- a/lib/librte_hash/Makefile
>> +++ b/lib/librte_hash/Makefile
>> @@ -8,13 +8,14 @@ LIB = librte_hash.a
>>
>>   CFLAGS += -O3
>>   CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
>> -LDLIBS += -lrte_eal -lrte_ring
>> +LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
>>
>>   EXPORT_MAP := rte_hash_version.map
>>
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
>>   SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_k32v64_hash.c
>>
>>   # install this header file
>>   SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h @@ -27,5
>> +28,15 @@ endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> rte_jhash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
>> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_k32v64_hash.h
>> +
>> +CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1
>> |
>> +\ grep -q __AVX512VL__ && echo 1)
>> +
>> +ifeq ($(CC_AVX512VL_SUPPORT), 1)
>> +	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
>> +	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
>> +	CFLAGS_rte_k32v64_hash.o += -DCC_AVX512VL_SUPPORT endif
>>
>>   include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c
>> b/lib/librte_hash/k32v64_hash_avx512vl.c
>> new file mode 100644
>> index 0000000..7c70dd2
>> --- /dev/null
>> +++ b/lib/librte_hash/k32v64_hash_avx512vl.c
>> @@ -0,0 +1,56 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <rte_k32v64_hash.h>
>> +
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
>> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
>> +
>> +static inline int
>> +k32v64_cmp_keys_avx512vl(struct rte_k32v64_hash_bucket *bucket,
>> uint32_t key,
>> +	uint64_t *val)
>> +{
>> +	__m128i keys, srch_key;
>> +	__mmask8 msk;
>> +
>> +	keys = _mm_load_si128((void *)bucket);
>> +	srch_key = _mm_set1_epi32(key);
>> +
>> +	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys,
>> srch_key);
>> +	if (msk) {
>> +		*val = bucket->val[__builtin_ctz(msk)];
>> +		return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static inline int
>> +k32v64_hash_lookup_avx512vl(struct rte_k32v64_hash_table *table,
>> uint32_t key,
>> +	uint32_t hash, uint64_t *value)
>> +{
>> +	return __k32v64_hash_lookup(table, key, hash, value,
>> +		k32v64_cmp_keys_avx512vl);
>> +}
>> +
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
>> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) {
>> +	int ret, cnt = 0;
>> +	unsigned int i;
>> +
>> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +			(values == NULL)))
>> +		return -EINVAL;
>> +
>> +	for (i = 0; i < n; i++) {
>> +		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
>> +			&values[i]);
>> +		if (ret == 0)
>> +			cnt++;
>> +	}
>> +	return cnt;
>> +}
>> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
>> 6ab46ae..8a37cc4 100644
>> --- a/lib/librte_hash/meson.build
>> +++ b/lib/librte_hash/meson.build
>> @@ -3,10 +3,23 @@
>>
>>   headers = files('rte_crc_arm64.h',
>>   	'rte_fbk_hash.h',
>> +	'rte_k32v64_hash.h',
>>   	'rte_hash_crc.h',
>>   	'rte_hash.h',
>>   	'rte_jhash.h',
>>   	'rte_thash.h')
>>
>> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') -deps += ['ring']
>> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c',
>> +'rte_k32v64_hash.c') deps += ['ring', 'mempool']
>> +
>> +if dpdk_conf.has('RTE_ARCH_X86')
>> +	if cc.has_argument('-mavx512vl')
>> +		avx512_tmplib = static_library('avx512_tmp',
>> +                                'k32v64_hash_avx512vl.c',
>> +                                dependencies: static_rte_mempool,
>> +                                c_args: cflags + ['-mavx512vl'])
>> +                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
>> +                cflags += '-DCC_AVX512VL_SUPPORT'
>> +
>> +	endif
>> +endif
>> diff --git a/lib/librte_hash/rte_hash_version.map
>> b/lib/librte_hash/rte_hash_version.map
>> index a8fbbc3..9a4f2f6 100644
>> --- a/lib/librte_hash/rte_hash_version.map
>> +++ b/lib/librte_hash/rte_hash_version.map
>> @@ -34,5 +34,9 @@ EXPERIMENTAL {
>>
>>   	rte_hash_free_key_with_position;
>>   	rte_hash_max_key_id;
>> -
>> +	rte_k32v64_hash_create;
>> +	rte_k32v64_hash_find_existing;
>> +	rte_k32v64_hash_free;
>> +	rte_k32v64_hash_add;
>> +	rte_k32v64_hash_delete;
>>   };
>> diff --git a/lib/librte_hash/rte_k32v64_hash.c
>> b/lib/librte_hash/rte_k32v64_hash.c
>> new file mode 100644
>> index 0000000..7ed94b4
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_k32v64_hash.c
>> @@ -0,0 +1,315 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <rte_eal_memconfig.h>
>> +#include <rte_errno.h>
>> +#include <rte_malloc.h>
>> +#include <rte_memory.h>
>> +#include <rte_tailq.h>
>> +
>> +#include <rte_k32v64_hash.h>
>> +
>> +TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
>> +
>> +static struct rte_tailq_elem rte_k32v64_hash_tailq = {
>> +	.name = "RTE_K32V64_HASH",
>> +};
>> +
>> +EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
>> +
>> +#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
>> +
>> +#ifdef CC_AVX512VL_SUPPORT
>> +int
>> +k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
>> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
>> +#endif
>> +
>> +static int
>> +k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t
>> *keys,
>> +	uint32_t *hashes, uint64_t *values, unsigned int n) {
>> +	int ret, cnt = 0;
>> +	unsigned int i;
>> +
>> +	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
>> +			(values == NULL)))
>> +		return -EINVAL;
>> +
>> +	for (i = 0; i < n; i++) {
>> +		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
>> +			&values[i]);
>> +		if (ret == 0)
>> +			cnt++;
>> +	}
>> +	return cnt;
>> +}
>> +
>> +static rte_k32v64_hash_bulk_lookup_t
>> +get_lookup_bulk_fn(void)
>> +{
>> +#ifdef CC_AVX512VL_SUPPORT
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
>> +		return k32v64_hash_bulk_lookup_avx512vl; #endif
>> +	return k32v64_hash_bulk_lookup;
>> +}
>> +
>> +int
>> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash, uint64_t value)
>> +{
>> +	uint32_t bucket;
>> +	int i, idx, ret;
>> +	uint8_t msk;
>> +	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
>> +
>> +	if (table == NULL)
>> +		return -EINVAL;
>> +
>> +	bucket = hash & table->bucket_msk;
>> +	/* Search key in table. Update value if exists */
>> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +		if ((key == table->t[bucket].key[i]) &&
>> +				(table->t[bucket].key_mask & (1 << i))) {
>> +			table->t[bucket].val[i] = value;
> The old value of val[i] might be getting used in the reader. It needs to be returned to the caller so that it can be put on the RCU defer queue to free later.
> You might also want to use an atomic store to ensure the stores are not split.


Agree, I will change API.


>> +			return 0;
>> +		}
>> +	}
>> +
>> +	if (!SLIST_EMPTY(&table->t[bucket].head)) {
>> +		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +			if (ent->key == key) {
>> +				ent->val = value;
> Same here, need to return the old value.
>
>> +				return 0;
>> +			}
>> +		}
>> +	}
>> +
>> +	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
>> +	if (msk) {
>> +		idx = __builtin_ctz(msk);
>> +		table->t[bucket].key[idx] = key;
>> +		table->t[bucket].val[idx] = value;
>> +		rte_smp_wmb();
>> +		table->t[bucket].key_mask |= 1 << idx;
> The barrier and the store can be replaced with a store-release in C11.


Agree

>
>> +		table->nb_ent++;
>> +		return 0;
>> +	}
>> +
>> +	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	SLIST_NEXT(ent, next) = NULL;
>> +	ent->key = key;
>> +	ent->val = value;
>> +	rte_smp_wmb();
>> +	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
>> +		prev = tmp;
>> +
>> +	if (prev == NULL)
>> +		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
>> +	else
>> +		SLIST_INSERT_AFTER(prev, ent, next);
> Need C11 SLIST


Please clarify


>> +
>> +	table->nb_ent++;
>> +	table->nb_ext_ent++;
>> +	return 0;
>> +}
>> +
>> +int
>> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash)
> This should return the value corresponding to the deleted key


Agree.


>
>> +{
>> +	uint32_t bucket;
>> +	int i;
>> +	struct rte_k32v64_ext_ent *ent;
>> +
>> +	if (table == NULL)
>> +		return -EINVAL;
>> +
>> +	bucket = hash & table->bucket_msk;
>> +
>> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +		if ((key == table->t[bucket].key[i]) &&
>> +				(table->t[bucket].key_mask & (1 << i))) {
>> +			ent = SLIST_FIRST(&table->t[bucket].head);
>> +			if (ent) {
>> +				rte_atomic32_inc(&table->t[bucket].cnt);
>> +				table->t[bucket].key[i] = ent->key;
> In this case, both key and value are changing and it is not atomic. There is a possibility that the lookup function will receive incorrect value of 'val[i]'. Suggest following the method described below.
>
>> +				table->t[bucket].val[i] = ent->val;
>> +				SLIST_REMOVE_HEAD(&table->t[bucket].head,
>> next);
>> +				rte_atomic32_inc(&table->t[bucket].cnt);
>> +				table->nb_ext_ent--;
>> +			} else
> Suggest changing this into a 2 step process:


Thanks, it is a good idea, but


> 1) Delete the entry from the fixed bucket (update key_mask)
> 2) Move the head of extended bucket to the fixed bucket
> 	2a) Insert the key/value from the head into the deleted index and update the key_mask (this will ensure that the reader has the entry available while it is being removed from the extended bucket)


With reader logic you suggested for __k32v64_cmp_keys() there could be 
possible race:
reader             writer

read bucket.cnt
load_acquire -> key_mask
if (key == key[i]) <- true
----interrupt----------
						1) Delete the entry from the fixed bucket (update key_mask)
						2a)
						--------interrupt-----
---wake------
get updated wrong value
cmp bucket.cnt
return with success but with invalid value

Please correct me if I'm wrong.


> 	2b) increment the counter indicating the extended bucket is changing
> 	2c) remove the head
> 	2d) increment the counter indicating the extended bucket change is done
>
> The above procedure will result in removing the spinning (resulting from step 2b) in the lookup function (comment is added in the lookup function), readers will not be blocked if the writer is scheduled out.


As far as I can see we must somehow indicate write_in_progress state for 
a bucket, at least because we can not guarantee atomic write for 
uint64_t on 32bit arch.


> This logic is implemented in rte_hash using cuckoo hash, suggest to take a look for required memory orderings.
>
>> +				table->t[bucket].key_mask &= ~(1 << i);
>> +			if (ent)
>> +				rte_mempool_put(table->ext_ent_pool, ent);
> Entry cannot be put to free list immediately as the readers are still using it.


With current transaction logic if reader is using ent while writer puts 
it in a pool I don't see any problem, because reader will detect 
bucket.cnt update and rerun search again.


>
>> +			table->nb_ent--;
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	SLIST_FOREACH(ent, &table->t[bucket].head, next)
>> +		if (ent->key == key)
>> +			break;
>> +
>> +	if (ent == NULL)
>> +		return -ENOENT;
>> +
>> +	rte_atomic32_inc(&table->t[bucket].cnt);
>> +	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent,
>> next);
>> +	rte_atomic32_inc(&table->t[bucket].cnt);
>> +	rte_mempool_put(table->ext_ent_pool, ent);
> Entry might be getting used by the readers still.
>
>> +
>> +	table->nb_ext_ent--;
>> +	table->nb_ent--;
>> +
>> +	return 0;
>> +}
>> +
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_find_existing(const char *name) {
>> +	struct rte_k32v64_hash_table *h = NULL;
>> +	struct rte_tailq_entry *te;
>> +	struct rte_k32v64_hash_list *k32v64_hash_list;
>> +
>> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +			rte_k32v64_hash_list);
>> +
>> +	rte_mcfg_tailq_read_lock();
>> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +		h = (struct rte_k32v64_hash_table *) te->data;
>> +		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE)
>> == 0)
>> +			break;
>> +	}
>> +	rte_mcfg_tailq_read_unlock();
>> +	if (te == NULL) {
>> +		rte_errno = ENOENT;
>> +		return NULL;
>> +	}
>> +	return h;
>> +}
>> +
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params) {
>> +	char hash_name[RTE_K32V64_HASH_NAMESIZE];
>> +	struct rte_k32v64_hash_table *ht = NULL;
>> +	struct rte_tailq_entry *te;
>> +	struct rte_k32v64_hash_list *k32v64_hash_list;
>> +	uint32_t mem_size, nb_buckets, max_ent;
>> +	int ret;
>> +	struct rte_mempool *mp;
>> +
>> +	if ((params == NULL) || (params->name == NULL) ||
>> +			(params->entries == 0)) {
>> +		rte_errno = EINVAL;
>> +		return NULL;
>> +	}
>> +
>> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +		rte_k32v64_hash_list);
>> +
>> +	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params-
>>> name);
>> +	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
>> +		rte_errno = ENAMETOOLONG;
>> +		return NULL;
>> +	}
>> +
>> +	max_ent = rte_align32pow2(params->entries);
>> +	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
>> +	mem_size = sizeof(struct rte_k32v64_hash_table) +
>> +		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
>> +
>> +	mp = rte_mempool_create(hash_name, max_ent,
>> +		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL,
>> NULL,
>> +		params->socket_id, 0);
>> +
>> +	if (mp == NULL)
>> +		return NULL;
>> +
>> +	rte_mcfg_tailq_write_lock();
>> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +		ht = (struct rte_k32v64_hash_table *) te->data;
>> +		if (strncmp(params->name, ht->name,
>> +				RTE_K32V64_HASH_NAMESIZE) == 0)
>> +			break;
>> +	}
>> +	ht = NULL;
>> +	if (te != NULL) {
>> +		rte_errno = EEXIST;
>> +		rte_mempool_free(mp);
>> +		goto exit;
>> +	}
>> +
>> +	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
>> +	if (te == NULL) {
>> +		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
>> +		rte_mempool_free(mp);
>> +		goto exit;
>> +	}
>> +
>> +	ht = rte_zmalloc_socket(hash_name, mem_size,
>> +		RTE_CACHE_LINE_SIZE, params->socket_id);
>> +	if (ht == NULL) {
>> +		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
>> +		rte_free(te);
>> +		rte_mempool_free(mp);
>> +		goto exit;
>> +	}
>> +
>> +	memcpy(ht->name, hash_name, sizeof(ht->name));
>> +	ht->max_ent = max_ent;
>> +	ht->bucket_msk = nb_buckets - 1;
>> +	ht->ext_ent_pool = mp;
>> +	ht->lookup = get_lookup_bulk_fn();
>> +
>> +	te->data = (void *)ht;
>> +	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
>> +
>> +exit:
>> +	rte_mcfg_tailq_write_unlock();
>> +
>> +	return ht;
>> +}
>> +
>> +void
>> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht) {
>> +	struct rte_tailq_entry *te;
>> +	struct rte_k32v64_hash_list *k32v64_hash_list;
>> +
>> +	if (ht == NULL)
>> +		return;
>> +
>> +	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
>> +				rte_k32v64_hash_list);
>> +
>> +	rte_mcfg_tailq_write_lock();
>> +
>> +	/* find out tailq entry */
>> +	TAILQ_FOREACH(te, k32v64_hash_list, next) {
>> +		if (te->data == (void *) ht)
>> +			break;
>> +	}
>> +
>> +
>> +	if (te == NULL) {
>> +		rte_mcfg_tailq_write_unlock();
>> +		return;
>> +	}
>> +
>> +	TAILQ_REMOVE(k32v64_hash_list, te, next);
>> +
>> +	rte_mcfg_tailq_write_unlock();
>> +
>> +	rte_mempool_free(ht->ext_ent_pool);
>> +	rte_free(ht);
>> +	rte_free(te);
>> +}
>> diff --git a/lib/librte_hash/rte_k32v64_hash.h
>> b/lib/librte_hash/rte_k32v64_hash.h
>> new file mode 100644
>> index 0000000..b2c52e9
>> --- /dev/null
>> +++ b/lib/librte_hash/rte_k32v64_hash.h
>> @@ -0,0 +1,211 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Intel Corporation
>> + */
>> +
>> +#ifndef _RTE_K32V64_HASH_H_
>> +#define _RTE_K32V64_HASH_H_
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <rte_compat.h>
>> +#include <rte_atomic.h>
>> +#include <rte_mempool.h>
>> +
>> +#define RTE_K32V64_HASH_NAMESIZE		32
>> +#define RTE_K32V64_KEYS_PER_BUCKET		4
>> +#define RTE_K32V64_WRITE_IN_PROGRESS		1
>> +
>> +struct rte_k32v64_hash_params {
>> +	const char *name;
>> +	uint32_t entries;
>> +	int socket_id;
>> +};
>> +
>> +struct rte_k32v64_ext_ent {
>> +	SLIST_ENTRY(rte_k32v64_ext_ent) next;
>> +	uint32_t	key;
>> +	uint64_t	val;
>> +};
>> +
>> +struct rte_k32v64_hash_bucket {
>> +	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
>> +	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
>> +	uint8_t		key_mask;
>> +	rte_atomic32_t	cnt;
>> +	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head; }
>> +__rte_cache_aligned;
>> +
>> +struct rte_k32v64_hash_table;
>> +
>> +typedef int (*rte_k32v64_hash_bulk_lookup_t) (struct
>> +rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
>> +	uint64_t *values, unsigned int n);
>> +
>> +struct rte_k32v64_hash_table {
>> +	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the
>> hash. */
>> +	uint32_t	nb_ent;		/**< Number of entities in the table*/
>> +	uint32_t	nb_ext_ent;	/**< Number of extended entities */
>> +	uint32_t	max_ent;	/**< Maximum number of entities */
>> +	uint32_t	bucket_msk;
>> +	struct rte_mempool	*ext_ent_pool;
>> +	rte_k32v64_hash_bulk_lookup_t	lookup;
>> +	__extension__ struct rte_k32v64_hash_bucket	t[0];
>> +};
>> +
>> +typedef int (*rte_k32v64_cmp_fn_t)
>> +(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
>> +
>> +static inline int
>> +__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
>> +	uint64_t *val)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
>> +		if ((key == bucket->key[i]) &&
>> +				(bucket->key_mask & (1 << i))) {
>> +			*val = bucket->val[i];
> This load can happen speculatively.
> You have to use the guard variable and payload concept here for reader-writer concurrency.
> On the writer:
> store -> key
> store -> val
> store_release -> key_mask (or there will be a rte_smp_wmb before this) 'key_mask' acts as the guard variable
>
> On the reader:
> load_acquire -> key_mask (or there will be a rte_smp_rmb after this)
> if statement etc.....
>
>> +			return 1;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static inline int
>> +__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f) {
>> +	uint64_t	val = 0;
>> +	struct rte_k32v64_ext_ent *ent;
>> +	int32_t	cnt;
>> +	int found = 0;
>> +	uint32_t bucket = hash & table->bucket_msk;
>> +
>> +	do {
>> +		do
>> +			cnt = rte_atomic32_read(&table->t[bucket].cnt);
>> +		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
> The reader can hang here if the writer increments the counter and gets scheduled out. Using the method suggested above, you can remove this code.


Agree, but we tried to make transaction critical section as small as we 
can so probability is very low.


>
>> +
>> +		found = cmp_f(&table->t[bucket], key, &val);
>> +		if (unlikely((found == 0) &&
>> +				(!SLIST_EMPTY(&table->t[bucket].head)))) {
>> +			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
>> +				if (ent->key == key) {
>> +					val = ent->val;
>> +					found = 1;
>> +					break;
>> +				}
>> +			}
>> +		}
>> +
>> +	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
> With the logic mentioned in delete function, the counter needs to be read at the beginning of the loop. Suggest looking at rte_hash-cuckoo hash for the memory ordering required for the counter.
>
>> +
>> +	if (found == 1) {
>> +		*value = val;
>> +		return 0;
>> +	} else
>> +		return -ENOENT;
>> +}
>> +
>> +static inline int
>> +rte_k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash, uint64_t *value)
>> +{
>> +	return __k32v64_hash_lookup(table, key, hash, value,
>> +__k32v64_cmp_keys); }
>> +
>> +static inline int
>> +rte_k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table,
>> +	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n) {
>> +	return table->lookup(table, keys, hashes, values, n); }
>> +
>> +/**
>> + * Add a key to an existing hash table with hash value.
>> + * This operation is not multi-thread safe
>> + * and should only be called from one thread.
>> + *
>> + * @param ht
>> + *   Hash table to add the key to.
>> + * @param key
>> + *   Key to add to the hash table.
>> + * @param value
>> + *   Value to associate with key.
>> + * @param hash
>> + *   Hash value associated with key.
>> + * @return
>> + *   0 if ok, or negative value on error.
>> + */
>> +__rte_experimental
>> +int
>> +rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash, uint64_t value);
>> +
>> +/**
>> + * Remove a key with a given hash value from an existing hash table.
>> + * This operation is not multi-thread
>> + * safe and should only be called from one thread.
>> + *
>> + * @param ht
>> + *   Hash table to remove the key from.
>> + * @param key
>> + *   Key to remove from the hash table.
>> + * @param hash
>> + *   hash value associated with key.
>> + * @return
>> + *   0 if ok, or negative value on error.
>> + */
>> +__rte_experimental
>> +int
>> +rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
>> +	uint32_t hash);
>> +
>> +
>> +/**
>> + * Performs a lookup for an existing hash table, and returns a pointer
>> +to
>> + * the table if found.
>> + *
>> + * @param name
>> + *   Name of the hash table to find
>> + *
>> + * @return
>> + *   pointer to hash table structure or NULL on error with rte_errno
>> + *   set appropriately.
>> + */
>> +__rte_experimental
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_find_existing(const char *name);
>> +
>> +/**
>> + * Create a new hash table for use with four byte keys.
>> + *
>> + * @param params
>> + *   Parameters used in creation of hash table.
>> + *
>> + * @return
>> + *   Pointer to hash table structure that is used in future hash table
>> + *   operations, or NULL on error with rte_errno set appropriately.
>> + */
>> +__rte_experimental
>> +struct rte_k32v64_hash_table *
>> +rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params);
>> +
>> +/**
>> + * Free all memory used by a hash table.
>> + *
>> + * @param table
>> + *   Hash table to deallocate.
>> + */
>> +__rte_experimental
>> +void
>> +rte_k32v64_hash_free(struct rte_k32v64_hash_table *table);
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _RTE_K32V64_HASH_H_ */
>> --
>> 2.7.4

diff mbox series

Patch

diff --git a/lib/Makefile b/lib/Makefile
index 46b91ae..a8c02e4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,7 +48,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ethdev \
 			librte_net librte_hash librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
-DEPDIRS-librte_hash := librte_eal librte_ring
+DEPDIRS-librte_hash := librte_eal librte_ring librte_mempool
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index ec9f864..023689d 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -8,13 +8,14 @@  LIB = librte_hash.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-LDLIBS += -lrte_eal -lrte_ring
+LDLIBS += -lrte_eal -lrte_ring -lrte_mempool
 
 EXPORT_MAP := rte_hash_version.map
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_k32v64_hash.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
@@ -27,5 +28,15 @@  endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_k32v64_hash.h
+
+CC_AVX512VL_SUPPORT=$(shell $(CC) -mavx512vl -dM -E - </dev/null 2>&1 | \
+grep -q __AVX512VL__ && echo 1)
+
+ifeq ($(CC_AVX512VL_SUPPORT), 1)
+	SRCS-$(CONFIG_RTE_LIBRTE_HASH) += k32v64_hash_avx512vl.c
+	CFLAGS_k32v64_hash_avx512vl.o += -mavx512vl
+	CFLAGS_rte_k32v64_hash.o += -DCC_AVX512VL_SUPPORT
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_hash/k32v64_hash_avx512vl.c b/lib/librte_hash/k32v64_hash_avx512vl.c
new file mode 100644
index 0000000..7c70dd2
--- /dev/null
+++ b/lib/librte_hash/k32v64_hash_avx512vl.c
@@ -0,0 +1,56 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <rte_k32v64_hash.h>
+
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
+	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
+
+static inline int
+k32v64_cmp_keys_avx512vl(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
+	uint64_t *val)
+{
+	__m128i keys, srch_key;
+	__mmask8 msk;
+
+	keys = _mm_load_si128((void *)bucket);
+	srch_key = _mm_set1_epi32(key);
+
+	msk = _mm_mask_cmpeq_epi32_mask(bucket->key_mask, keys, srch_key);
+	if (msk) {
+		*val = bucket->val[__builtin_ctz(msk)];
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int
+k32v64_hash_lookup_avx512vl(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value)
+{
+	return __k32v64_hash_lookup(table, key, hash, value,
+		k32v64_cmp_keys_avx512vl);
+}
+
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
+	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n)
+{
+	int ret, cnt = 0;
+	unsigned int i;
+
+	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
+			(values == NULL)))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		ret = k32v64_hash_lookup_avx512vl(table, keys[i], hashes[i],
+			&values[i]);
+		if (ret == 0)
+			cnt++;
+	}
+	return cnt;
+}
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 6ab46ae..8a37cc4 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -3,10 +3,23 @@ 
 
 headers = files('rte_crc_arm64.h',
 	'rte_fbk_hash.h',
+	'rte_k32v64_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
 	'rte_jhash.h',
 	'rte_thash.h')
 
-sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
-deps += ['ring']
+sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_k32v64_hash.c')
+deps += ['ring', 'mempool']
+
+if dpdk_conf.has('RTE_ARCH_X86')
+	if cc.has_argument('-mavx512vl')
+		avx512_tmplib = static_library('avx512_tmp',
+                                'k32v64_hash_avx512vl.c',
+                                dependencies: static_rte_mempool,
+                                c_args: cflags + ['-mavx512vl'])
+                objs += avx512_tmplib.extract_objects('k32v64_hash_avx512vl.c')
+                cflags += '-DCC_AVX512VL_SUPPORT'
+
+	endif
+endif
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index a8fbbc3..9a4f2f6 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -34,5 +34,9 @@  EXPERIMENTAL {
 
 	rte_hash_free_key_with_position;
 	rte_hash_max_key_id;
-
+	rte_k32v64_hash_create;
+	rte_k32v64_hash_find_existing;
+	rte_k32v64_hash_free;
+	rte_k32v64_hash_add;
+	rte_k32v64_hash_delete;
 };
diff --git a/lib/librte_hash/rte_k32v64_hash.c b/lib/librte_hash/rte_k32v64_hash.c
new file mode 100644
index 0000000..7ed94b4
--- /dev/null
+++ b/lib/librte_hash/rte_k32v64_hash.c
@@ -0,0 +1,315 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+
+#include <rte_k32v64_hash.h>
+
+TAILQ_HEAD(rte_k32v64_hash_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_k32v64_hash_tailq = {
+	.name = "RTE_K32V64_HASH",
+};
+
+EAL_REGISTER_TAILQ(rte_k32v64_hash_tailq);
+
+#define VALID_KEY_MSK           ((1 << RTE_K32V64_KEYS_PER_BUCKET) - 1)
+
+#ifdef CC_AVX512VL_SUPPORT
+int
+k32v64_hash_bulk_lookup_avx512vl(struct rte_k32v64_hash_table *table,
+	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n);
+#endif
+
+static int
+k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table, uint32_t *keys,
+	uint32_t *hashes, uint64_t *values, unsigned int n)
+{
+	int ret, cnt = 0;
+	unsigned int i;
+
+	if (unlikely((table == NULL) || (keys == NULL) || (hashes == NULL) ||
+			(values == NULL)))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		ret = rte_k32v64_hash_lookup(table, keys[i], hashes[i],
+			&values[i]);
+		if (ret == 0)
+			cnt++;
+	}
+	return cnt;
+}
+
+static rte_k32v64_hash_bulk_lookup_t
+get_lookup_bulk_fn(void)
+{
+#ifdef CC_AVX512VL_SUPPORT
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+		return k32v64_hash_bulk_lookup_avx512vl;
+#endif
+	return k32v64_hash_bulk_lookup;
+}
+
+int
+rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t value)
+{
+	uint32_t bucket;
+	int i, idx, ret;
+	uint8_t msk;
+	struct rte_k32v64_ext_ent *tmp, *ent, *prev = NULL;
+
+	if (table == NULL)
+		return -EINVAL;
+
+	bucket = hash & table->bucket_msk;
+	/* Search key in table. Update value if exists */
+	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == table->t[bucket].key[i]) &&
+				(table->t[bucket].key_mask & (1 << i))) {
+			table->t[bucket].val[i] = value;
+			return 0;
+		}
+	}
+
+	if (!SLIST_EMPTY(&table->t[bucket].head)) {
+		SLIST_FOREACH(ent, &table->t[bucket].head, next) {
+			if (ent->key == key) {
+				ent->val = value;
+				return 0;
+			}
+		}
+	}
+
+	msk = ~table->t[bucket].key_mask & VALID_KEY_MSK;
+	if (msk) {
+		idx = __builtin_ctz(msk);
+		table->t[bucket].key[idx] = key;
+		table->t[bucket].val[idx] = value;
+		rte_smp_wmb();
+		table->t[bucket].key_mask |= 1 << idx;
+		table->nb_ent++;
+		return 0;
+	}
+
+	ret = rte_mempool_get(table->ext_ent_pool, (void **)&ent);
+	if (ret < 0)
+		return ret;
+
+	SLIST_NEXT(ent, next) = NULL;
+	ent->key = key;
+	ent->val = value;
+	rte_smp_wmb();
+	SLIST_FOREACH(tmp, &table->t[bucket].head, next)
+		prev = tmp;
+
+	if (prev == NULL)
+		SLIST_INSERT_HEAD(&table->t[bucket].head, ent, next);
+	else
+		SLIST_INSERT_AFTER(prev, ent, next);
+
+	table->nb_ent++;
+	table->nb_ext_ent++;
+	return 0;
+}
+
+int
+rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash)
+{
+	uint32_t bucket;
+	int i;
+	struct rte_k32v64_ext_ent *ent;
+
+	if (table == NULL)
+		return -EINVAL;
+
+	bucket = hash & table->bucket_msk;
+
+	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == table->t[bucket].key[i]) &&
+				(table->t[bucket].key_mask & (1 << i))) {
+			ent = SLIST_FIRST(&table->t[bucket].head);
+			if (ent) {
+				rte_atomic32_inc(&table->t[bucket].cnt);
+				table->t[bucket].key[i] = ent->key;
+				table->t[bucket].val[i] = ent->val;
+				SLIST_REMOVE_HEAD(&table->t[bucket].head, next);
+				rte_atomic32_inc(&table->t[bucket].cnt);
+				table->nb_ext_ent--;
+			} else
+				table->t[bucket].key_mask &= ~(1 << i);
+			if (ent)
+				rte_mempool_put(table->ext_ent_pool, ent);
+			table->nb_ent--;
+			return 0;
+		}
+	}
+
+	SLIST_FOREACH(ent, &table->t[bucket].head, next)
+		if (ent->key == key)
+			break;
+
+	if (ent == NULL)
+		return -ENOENT;
+
+	rte_atomic32_inc(&table->t[bucket].cnt);
+	SLIST_REMOVE(&table->t[bucket].head, ent, rte_k32v64_ext_ent, next);
+	rte_atomic32_inc(&table->t[bucket].cnt);
+	rte_mempool_put(table->ext_ent_pool, ent);
+
+	table->nb_ext_ent--;
+	table->nb_ent--;
+
+	return 0;
+}
+
+struct rte_k32v64_hash_table *
+rte_k32v64_hash_find_existing(const char *name)
+{
+	struct rte_k32v64_hash_table *h = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_k32v64_hash_list *k32v64_hash_list;
+
+	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
+			rte_k32v64_hash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, k32v64_hash_list, next) {
+		h = (struct rte_k32v64_hash_table *) te->data;
+		if (strncmp(name, h->name, RTE_K32V64_HASH_NAMESIZE) == 0)
+			break;
+	}
+	rte_mcfg_tailq_read_unlock();
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+	return h;
+}
+
+struct rte_k32v64_hash_table *
+rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params)
+{
+	char hash_name[RTE_K32V64_HASH_NAMESIZE];
+	struct rte_k32v64_hash_table *ht = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_k32v64_hash_list *k32v64_hash_list;
+	uint32_t mem_size, nb_buckets, max_ent;
+	int ret;
+	struct rte_mempool *mp;
+
+	if ((params == NULL) || (params->name == NULL) ||
+			(params->entries == 0)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
+		rte_k32v64_hash_list);
+
+	ret = snprintf(hash_name, sizeof(hash_name), "K32V64_%s", params->name);
+	if (ret < 0 || ret >= RTE_K32V64_HASH_NAMESIZE) {
+		rte_errno = ENAMETOOLONG;
+		return NULL;
+	}
+
+	max_ent = rte_align32pow2(params->entries);
+	nb_buckets = max_ent / RTE_K32V64_KEYS_PER_BUCKET;
+	mem_size = sizeof(struct rte_k32v64_hash_table) +
+		sizeof(struct rte_k32v64_hash_bucket) * nb_buckets;
+
+	mp = rte_mempool_create(hash_name, max_ent,
+		sizeof(struct rte_k32v64_ext_ent), 0, 0, NULL, NULL, NULL, NULL,
+		params->socket_id, 0);
+
+	if (mp == NULL)
+		return NULL;
+
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, k32v64_hash_list, next) {
+		ht = (struct rte_k32v64_hash_table *) te->data;
+		if (strncmp(params->name, ht->name,
+				RTE_K32V64_HASH_NAMESIZE) == 0)
+			break;
+	}
+	ht = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		rte_mempool_free(mp);
+		goto exit;
+	}
+
+	te = rte_zmalloc("K32V64_HASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
+		rte_mempool_free(mp);
+		goto exit;
+	}
+
+	ht = rte_zmalloc_socket(hash_name, mem_size,
+		RTE_CACHE_LINE_SIZE, params->socket_id);
+	if (ht == NULL) {
+		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
+		rte_free(te);
+		rte_mempool_free(mp);
+		goto exit;
+	}
+
+	memcpy(ht->name, hash_name, sizeof(ht->name));
+	ht->max_ent = max_ent;
+	ht->bucket_msk = nb_buckets - 1;
+	ht->ext_ent_pool = mp;
+	ht->lookup = get_lookup_bulk_fn();
+
+	te->data = (void *)ht;
+	TAILQ_INSERT_TAIL(k32v64_hash_list, te, next);
+
+exit:
+	rte_mcfg_tailq_write_unlock();
+
+	return ht;
+}
+
+void
+rte_k32v64_hash_free(struct rte_k32v64_hash_table *ht)
+{
+	struct rte_tailq_entry *te;
+	struct rte_k32v64_hash_list *k32v64_hash_list;
+
+	if (ht == NULL)
+		return;
+
+	k32v64_hash_list = RTE_TAILQ_CAST(rte_k32v64_hash_tailq.head,
+				rte_k32v64_hash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, k32v64_hash_list, next) {
+		if (te->data == (void *) ht)
+			break;
+	}
+
+
+	if (te == NULL) {
+		rte_mcfg_tailq_write_unlock();
+		return;
+	}
+
+	TAILQ_REMOVE(k32v64_hash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	rte_mempool_free(ht->ext_ent_pool);
+	rte_free(ht);
+	rte_free(te);
+}
diff --git a/lib/librte_hash/rte_k32v64_hash.h b/lib/librte_hash/rte_k32v64_hash.h
new file mode 100644
index 0000000..b2c52e9
--- /dev/null
+++ b/lib/librte_hash/rte_k32v64_hash.h
@@ -0,0 +1,211 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_K32V64_HASH_H_
+#define _RTE_K32V64_HASH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_compat.h>
+#include <rte_atomic.h>
+#include <rte_mempool.h>
+
+#define RTE_K32V64_HASH_NAMESIZE		32
+#define RTE_K32V64_KEYS_PER_BUCKET		4
+#define RTE_K32V64_WRITE_IN_PROGRESS		1
+
+struct rte_k32v64_hash_params {
+	const char *name;
+	uint32_t entries;
+	int socket_id;
+};
+
+struct rte_k32v64_ext_ent {
+	SLIST_ENTRY(rte_k32v64_ext_ent) next;
+	uint32_t	key;
+	uint64_t	val;
+};
+
+struct rte_k32v64_hash_bucket {
+	uint32_t	key[RTE_K32V64_KEYS_PER_BUCKET];
+	uint64_t	val[RTE_K32V64_KEYS_PER_BUCKET];
+	uint8_t		key_mask;
+	rte_atomic32_t	cnt;
+	SLIST_HEAD(rte_k32v64_list_head, rte_k32v64_ext_ent) head;
+} __rte_cache_aligned;
+
+struct rte_k32v64_hash_table;
+
+typedef int (*rte_k32v64_hash_bulk_lookup_t)
+(struct rte_k32v64_hash_table *table, uint32_t *keys, uint32_t *hashes,
+	uint64_t *values, unsigned int n);
+
+struct rte_k32v64_hash_table {
+	char name[RTE_K32V64_HASH_NAMESIZE];	/**< Name of the hash. */
+	uint32_t	nb_ent;		/**< Number of entities in the table*/
+	uint32_t	nb_ext_ent;	/**< Number of extended entities */
+	uint32_t	max_ent;	/**< Maximum number of entities */
+	uint32_t	bucket_msk;
+	struct rte_mempool	*ext_ent_pool;
+	rte_k32v64_hash_bulk_lookup_t	lookup;
+	__extension__ struct rte_k32v64_hash_bucket	t[0];
+};
+
+typedef int (*rte_k32v64_cmp_fn_t)
+(struct rte_k32v64_hash_bucket *bucket, uint32_t key, uint64_t *val);
+
+static inline int
+__k32v64_cmp_keys(struct rte_k32v64_hash_bucket *bucket, uint32_t key,
+	uint64_t *val)
+{
+	int i;
+
+	for (i = 0; i < RTE_K32V64_KEYS_PER_BUCKET; i++) {
+		if ((key == bucket->key[i]) &&
+				(bucket->key_mask & (1 << i))) {
+			*val = bucket->val[i];
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static inline int
+__k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value, rte_k32v64_cmp_fn_t cmp_f)
+{
+	uint64_t	val = 0;
+	struct rte_k32v64_ext_ent *ent;
+	int32_t	cnt;
+	int found = 0;
+	uint32_t bucket = hash & table->bucket_msk;
+
+	do {
+		do
+			cnt = rte_atomic32_read(&table->t[bucket].cnt);
+		while (unlikely(cnt & RTE_K32V64_WRITE_IN_PROGRESS));
+
+		found = cmp_f(&table->t[bucket], key, &val);
+		if (unlikely((found == 0) &&
+				(!SLIST_EMPTY(&table->t[bucket].head)))) {
+			SLIST_FOREACH(ent, &table->t[bucket].head, next) {
+				if (ent->key == key) {
+					val = ent->val;
+					found = 1;
+					break;
+				}
+			}
+		}
+
+	} while (unlikely(cnt != rte_atomic32_read(&table->t[bucket].cnt)));
+
+	if (found == 1) {
+		*value = val;
+		return 0;
+	} else
+		return -ENOENT;
+}
+
+static inline int
+rte_k32v64_hash_lookup(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t *value)
+{
+	return __k32v64_hash_lookup(table, key, hash, value, __k32v64_cmp_keys);
+}
+
+static inline int
+rte_k32v64_hash_bulk_lookup(struct rte_k32v64_hash_table *table,
+	uint32_t *keys, uint32_t *hashes, uint64_t *values, unsigned int n)
+{
+	return table->lookup(table, keys, hashes, values, n);
+}
+
+/**
+ * Add a key to an existing hash table with hash value.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param value
+ *   Value to associate with key.
+ * @param hash
+ *   Hash value associated with key.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+__rte_experimental
+int
+rte_k32v64_hash_add(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash, uint64_t value);
+
+/**
+ * Remove a key with a given hash value from an existing hash table.
+ * This operation is not multi-thread
+ * safe and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @param hash
+ *   hash value associated with key.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+__rte_experimental
+int
+rte_k32v64_hash_delete(struct rte_k32v64_hash_table *table, uint32_t key,
+	uint32_t hash);
+
+
+/**
+ * Performs a lookup for an existing hash table, and returns a pointer to
+ * the table if found.
+ *
+ * @param name
+ *   Name of the hash table to find
+ *
+ * @return
+ *   pointer to hash table structure or NULL on error with rte_errno
+ *   set appropriately.
+ */
+__rte_experimental
+struct rte_k32v64_hash_table *
+rte_k32v64_hash_find_existing(const char *name);
+
+/**
+ * Create a new hash table for use with four byte keys.
+ *
+ * @param params
+ *   Parameters used in creation of hash table.
+ *
+ * @return
+ *   Pointer to hash table structure that is used in future hash table
+ *   operations, or NULL on error with rte_errno set appropriately.
+ */
+__rte_experimental
+struct rte_k32v64_hash_table *
+rte_k32v64_hash_create(const struct rte_k32v64_hash_params *params);
+
+/**
+ * Free all memory used by a hash table.
+ *
+ * @param table
+ *   Hash table to deallocate.
+ */
+__rte_experimental
+void
+rte_k32v64_hash_free(struct rte_k32v64_hash_table *table);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_K32V64_HASH_H_ */