diff mbox series

[v1,3/5] hash: add extendable bucket feature

Message ID 1536253745-133104-4-git-send-email-yipeng1.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series hash: add extendable bucket and partial-key hashing | expand

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Wang, Yipeng1 Sept. 6, 2018, 5:09 p.m. UTC
In use cases that hash table capacity needs to be guaranteed,
the extendable bucket feature can be used to contain extra
keys in linked lists when conflict happens. This is similar
concept to the extendable bucket hash table in packet
framework.

This commit adds the extendable bucket feature. User can turn
it on or off through the extra flag field during table
creation time.

Extendable bucket table composes of buckets that can be
linked list to current main table. When extendable bucket
is enabled, the table utilization can always acheive 100%.
Although keys ending up in the ext buckets may have longer
look up time, they should be rare due to the cuckoo
algorithm.

Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>
---
 lib/librte_hash/rte_cuckoo_hash.c | 331 +++++++++++++++++++++++++++++++++-----
 lib/librte_hash/rte_cuckoo_hash.h |   5 +
 lib/librte_hash/rte_hash.h        |   3 +
 3 files changed, 298 insertions(+), 41 deletions(-)
diff mbox series

Patch

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index f7b86c8..ff380bb 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -31,6 +31,10 @@ 
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
+#define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
+	for (CURRENT_BKT = START_BUCKET;                                      \
+		CURRENT_BKT != NULL;                                          \
+		CURRENT_BKT = CURRENT_BKT->next)
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
 
@@ -63,6 +67,16 @@  rte_hash_find_existing(const char *name)
 	return h;
 }
 
+static inline struct rte_hash_bucket *
+rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
+{
+	while (1) {
+		if (lst_bkt->next == NULL)
+			return lst_bkt;
+		lst_bkt = lst_bkt->next;
+	}
+}
+
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 {
 	h->cmp_jump_table_idx = KEY_CUSTOM;
@@ -85,13 +99,17 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	struct rte_tailq_entry *te = NULL;
 	struct rte_hash_list *hash_list;
 	struct rte_ring *r = NULL;
+	struct rte_ring *r_ext = NULL;
 	char hash_name[RTE_HASH_NAMESIZE];
 	void *k = NULL;
 	void *buckets = NULL;
+	void *buckets_ext = NULL;
 	char ring_name[RTE_RING_NAMESIZE];
+	char ext_ring_name[RTE_RING_NAMESIZE];
 	unsigned num_key_slots;
 	unsigned i;
 	unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+	unsigned int ext_table_support = 0;
 	unsigned int readwrite_concur_support = 0;
 
 	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
@@ -124,6 +142,9 @@  rte_hash_create(const struct rte_hash_parameters *params)
 		multi_writer_support = 1;
 	}
 
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
+		ext_table_support = 1;
+
 	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
 	if (multi_writer_support)
 		/*
@@ -145,6 +166,24 @@  rte_hash_create(const struct rte_hash_parameters *params)
 		goto err;
 	}
 
+	const uint32_t num_buckets = rte_align32pow2(params->entries) /
+						RTE_HASH_BUCKET_ENTRIES;
+
+	snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
+								params->name);
+	/* Create ring for extendable buckets. */
+	if (ext_table_support) {
+		r_ext = rte_ring_create(ext_ring_name,
+				rte_align32pow2(num_buckets + 1),
+				params->socket_id, 0);
+
+		if (r_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+								"failed\n");
+			goto err;
+		}
+	}
+
 	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
 
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
@@ -177,18 +216,34 @@  rte_hash_create(const struct rte_hash_parameters *params)
 		goto err_unlock;
 	}
 
-	const uint32_t num_buckets = rte_align32pow2(params->entries)
-					/ RTE_HASH_BUCKET_ENTRIES;
-
 	buckets = rte_zmalloc_socket(NULL,
 				num_buckets * sizeof(struct rte_hash_bucket),
 				RTE_CACHE_LINE_SIZE, params->socket_id);
 
 	if (buckets == NULL) {
-		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
 		goto err_unlock;
 	}
 
+	/* Allocate same number of extendable buckets */
+	if (ext_table_support) {
+		buckets_ext = rte_zmalloc_socket(NULL,
+				num_buckets * sizeof(struct rte_hash_bucket),
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+		if (buckets_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+							"failed\n");
+			goto err_unlock;
+		}
+		/* Populate ext bkt ring. We reserve 0 similar to the
+		 * key-data slot, just in case in future we want to
+		 * use bucket index for the linked list and 0 means NULL
+		 * for next bucket
+		 */
+		for (i = 1; i <= num_buckets; i++)
+			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
+	}
+
 	const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
 	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
 
@@ -262,6 +317,8 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	h->num_buckets = num_buckets;
 	h->bucket_bitmask = h->num_buckets - 1;
 	h->buckets = buckets;
+	h->buckets_ext = buckets_ext;
+	h->free_ext_bkts = r_ext;
 	h->hash_func = (params->hash_func == NULL) ?
 		default_hash_func : params->hash_func;
 	h->key_store = k;
@@ -269,6 +326,7 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	h->hw_trans_mem_support = hw_trans_mem_support;
 	h->multi_writer_support = multi_writer_support;
 	h->readwrite_concur_support = readwrite_concur_support;
+	h->ext_table_support = ext_table_support;
 
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -304,9 +362,11 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 err:
 	rte_ring_free(r);
+	rte_ring_free(r_ext);
 	rte_free(te);
 	rte_free(h);
 	rte_free(buckets);
+	rte_free(buckets_ext);
 	rte_free(k);
 	return NULL;
 }
@@ -344,6 +404,7 @@  rte_hash_free(struct rte_hash *h)
 		rte_free(h->readwrite_lock);
 	}
 	rte_ring_free(h->free_slots);
+	rte_ring_free(h->free_ext_bkts);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
 	rte_free(h);
@@ -448,6 +509,14 @@  rte_hash_reset(struct rte_hash *h)
 	while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
 		rte_pause();
 
+	/* clear free extendable bucket ring and memory */
+	if (h->ext_table_support) {
+		memset(h->buckets_ext, 0, h->num_buckets *
+						sizeof(struct rte_hash_bucket));
+		while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
+			rte_pause();
+	}
+
 	/* Repopulate the free slots ring. Entry zero is reserved for key misses */
 	if (h->multi_writer_support)
 		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
@@ -458,6 +527,12 @@  rte_hash_reset(struct rte_hash *h)
 	for (i = 1; i < tot_ring_cnt + 1; i++)
 		rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
 
+	/* Repopulate the free ext bkt ring. */
+	if (h->ext_table_support)
+		for (i = 1; i < h->num_buckets + 1; i++)
+			rte_ring_sp_enqueue(h->free_ext_bkts,
+						(void *)((uintptr_t) i));
+
 	if (h->multi_writer_support) {
 		/* Reset local caches per lcore */
 		for (i = 0; i < RTE_MAX_LCORE; i++)
@@ -524,24 +599,27 @@  rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 		int32_t *ret_val)
 {
 	unsigned int i;
-	struct rte_hash_bucket *cur_bkt = prim_bkt;
+	struct rte_hash_bucket *cur_bkt;
 	int32_t ret;
 
 	__hash_rw_writer_lock(h);
 	/* Check if key was inserted after last check but before this
 	 * protected region in case of inserting duplicated keys.
 	 */
-	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
 		return 1;
 	}
-	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		*ret_val = ret;
-		return 1;
+
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			*ret_val = ret;
+			return 1;
+		}
 	}
 
 	/* Insert new entry if there is room in the primary
@@ -580,7 +658,7 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 			int32_t *ret_val)
 {
 	uint32_t prev_alt_bkt_idx;
-	struct rte_hash_bucket *cur_bkt = bkt;
+	struct rte_hash_bucket *cur_bkt;
 	struct queue_node *prev_node, *curr_node = leaf;
 	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
 	uint32_t prev_slot, curr_slot = leaf_slot;
@@ -597,18 +675,20 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 	/* Check if key was inserted after last check but before this
 	 * protected region.
 	 */
-	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, bkt, sig, alt_hash);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
 		return 1;
 	}
 
-	ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		*ret_val = ret;
-		return 1;
+	FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			*ret_val = ret;
+			return 1;
+		}
 	}
 
 	while (likely(curr_node->prev != NULL)) {
@@ -711,15 +791,18 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 {
 	hash_sig_t alt_hash;
 	uint32_t prim_bucket_idx, sec_bucket_idx;
-	struct rte_hash_bucket *prim_bkt, *sec_bkt;
+	struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
 	struct rte_hash_key *new_k, *keys = h->key_store;
 	void *slot_id = NULL;
-	uint32_t new_idx;
+	void *ext_bkt_id = NULL;
+	uint32_t new_idx, bkt_id;
 	int ret;
 	unsigned n_slots;
 	unsigned lcore_id;
+	unsigned int i;
 	struct lcore_cache *cached_free_slots = NULL;
 	int32_t ret_val;
+	struct rte_hash_bucket *last;
 
 	prim_bucket_idx = sig & h->bucket_bitmask;
 	prim_bkt = &h->buckets[prim_bucket_idx];
@@ -739,10 +822,12 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	}
 
 	/* Check if key is already inserted in secondary location */
-	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		return ret;
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			return ret;
+		}
 	}
 	__hash_rw_writer_unlock(h);
 
@@ -808,10 +893,72 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	else if (ret == 1) {
 		enqueue_slot_back(h, cached_free_slots, slot_id);
 		return ret_val;
-	} else {
+	}
+
+	/* if ext table not enabled, we failed the insertion */
+	if (!h->ext_table_support) {
 		enqueue_slot_back(h, cached_free_slots, slot_id);
 		return ret;
 	}
+
+	/* Now we need to go through the extendable table. Protection is needed
+	 * to protect all extendable table processes.
+	 */
+	__hash_rw_writer_lock(h);
+	/* We check of duplicates again since could be added before the lock */
+	/* Check if key is already inserted in primary location */
+	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	if (ret != -1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		goto failure;
+	}
+
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		if (ret != -1) {
+			enqueue_slot_back(h, cached_free_slots, slot_id);
+			goto failure;
+		}
+	}
+
+	/* search extendable table to find an empty entry */
+	struct rte_hash_bucket *next_bkt = sec_bkt->next;
+	FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			/* Check if slot is available */
+			if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
+				cur_bkt->sig_current[i] = alt_hash;
+				cur_bkt->sig_alt[i] = sig;
+				cur_bkt->key_idx[i] = new_idx;
+				__hash_rw_writer_unlock(h);
+				return new_idx - 1;
+			}
+		}
+	}
+
+	/* failed to get an empty entry from extendable table. Link a new
+	 * extendable bucket. We first get an free bucket from ring.
+	 */
+	if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
+		ret = -ENOSPC;
+		goto failure;
+	}
+
+	bkt_id = (uint32_t)((uintptr_t) ext_bkt_id) - 1;
+	/* Use the first location of the new bucket */
+	(h->buckets_ext[bkt_id]).sig_current[0] = alt_hash;
+	(h->buckets_ext[bkt_id]).sig_alt[0] = sig;
+	(h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
+	/* Link the new bucket to sec bucket linked list */
+	last = rte_hash_get_last_bkt(sec_bkt);
+	last->next = &h->buckets_ext[bkt_id];
+	__hash_rw_writer_unlock(h);
+	return new_idx - 1;
+
+failure:
+	__hash_rw_writer_unlock(h);
+	return ret;
+
 }
 
 int32_t
@@ -890,7 +1037,7 @@  __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 {
 	uint32_t bucket_idx;
 	hash_sig_t alt_hash;
-	struct rte_hash_bucket *bkt;
+	struct rte_hash_bucket *bkt, *cur_bkt;
 	int ret;
 
 	bucket_idx = sig & h->bucket_bitmask;
@@ -904,16 +1051,19 @@  __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 		__hash_rw_reader_unlock(h);
 		return ret;
 	}
+
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
 	bkt = &h->buckets[bucket_idx];
 
 	/* Check if key is in secondary location */
-	ret = search_one_bucket(h, key, alt_hash, data, bkt);
-	if (ret != -1) {
-		__hash_rw_reader_unlock(h);
-		return ret;
+	FOR_EACH_BUCKET(cur_bkt, bkt) {
+		ret = search_one_bucket(h, key, alt_hash, data, cur_bkt);
+		if (ret != -1) {
+			__hash_rw_reader_unlock(h);
+			return ret;
+		}
 	}
 	__hash_rw_reader_unlock(h);
 	return -ENOENT;
@@ -1015,15 +1165,17 @@  __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 {
 	uint32_t bucket_idx;
 	hash_sig_t alt_hash;
-	struct rte_hash_bucket *bkt;
-	int32_t ret;
+	struct rte_hash_bucket *prim_bkt, *sec_bkt;
+	struct rte_hash_bucket *cur_bkt, *prev_bkt, *next_bkt;
+	int32_t ret, i;
+	struct rte_hash_bucket *tobe_removed_bkt = NULL;
 
 	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	prim_bkt = &h->buckets[bucket_idx];
 
 	__hash_rw_writer_lock(h);
 	/* look for key in primary bucket */
-	ret = search_and_remove(h, key, bkt, sig);
+	ret = search_and_remove(h, key, prim_bkt, sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
@@ -1032,17 +1184,53 @@  __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	sec_bkt = &h->buckets[bucket_idx];
 
 	/* look for key in secondary bucket */
-	ret = search_and_remove(h, key, bkt, alt_hash);
+	ret = search_and_remove(h, key, sec_bkt, alt_hash);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
 	}
 
+	/* Not in main table, we need to search ext table */
+	if (h->ext_table_support) {
+		next_bkt = sec_bkt->next;
+		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+			ret = search_and_remove(h, key, cur_bkt, alt_hash);
+			if (ret != -1)
+				goto return_bkt;
+		}
+	}
+
 	__hash_rw_writer_unlock(h);
 	return -ENOENT;
+
+/* Search extendable buckets to see if any empty bucket need to be recycled */
+return_bkt:
+	prev_bkt = sec_bkt;
+
+	for (cur_bkt = sec_bkt->next; cur_bkt != NULL;
+			prev_bkt = cur_bkt, cur_bkt = cur_bkt->next) {
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			if (cur_bkt->key_idx[i] != EMPTY_SLOT)
+				break;
+		}
+		if (i == RTE_HASH_BUCKET_ENTRIES) {
+			prev_bkt->next = cur_bkt->next;
+			cur_bkt->next = NULL;
+			tobe_removed_bkt = cur_bkt;
+			break;
+		}
+	}
+
+	__hash_rw_writer_unlock(h);
+
+	if (tobe_removed_bkt) {
+		uint32_t index = tobe_removed_bkt - h->buckets_ext + 1;
+		rte_ring_mp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
+	}
+	return ret;
 }
 
 int32_t
@@ -1143,6 +1331,7 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 {
 	uint64_t hits = 0;
 	int32_t i;
+	int32_t ret;
 	uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
 	uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
 	const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
@@ -1266,6 +1455,35 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		continue;
 	}
 
+	/* all found, do not need to go through ext bkt */
+	if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
+		if (hit_mask != NULL)
+			*hit_mask = hits;
+		__hash_rw_reader_unlock(h);
+		return;
+	}
+
+	/* need to check ext buckets for match */
+	for (i = 0; i < num_keys; i++) {
+		if ((hits & (1ULL << i)) != 0)
+			continue;
+		struct rte_hash_bucket *cur_bkt;
+		struct rte_hash_bucket *next_bkt = secondary_bkt[i]->next;
+		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+			if (data != NULL)
+				ret = search_one_bucket(h, keys[i],
+						sec_hash[i], &data[i], cur_bkt);
+			else
+				ret = search_one_bucket(h, keys[i],
+						sec_hash[i], NULL, cur_bkt);
+			if (ret != -1) {
+				positions[i] = ret;
+				hits |= 1ULL << i;
+				break;
+			}
+		}
+	}
+
 	__hash_rw_reader_unlock(h);
 
 	if (hit_mask != NULL)
@@ -1308,10 +1526,13 @@  rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 
 	RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
 
-	const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
+	const uint32_t total_entries_main = h->num_buckets *
+							RTE_HASH_BUCKET_ENTRIES;
+	const uint32_t total_entries = total_entries_main << 1;
+
 	/* Out of bounds */
-	if (*next >= total_entries)
-		return -ENOENT;
+	if (*next >= total_entries_main)
+		goto extend_table;
 
 	/* Calculate bucket and index of current iterator */
 	bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
@@ -1321,8 +1542,8 @@  rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
 		(*next)++;
 		/* End of table */
-		if (*next == total_entries)
-			return -ENOENT;
+		if (*next == total_entries_main)
+			goto extend_table;
 		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
 		idx = *next % RTE_HASH_BUCKET_ENTRIES;
 	}
@@ -1341,4 +1562,32 @@  rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	(*next)++;
 
 	return position - 1;
+
+extend_table:
+	/* Out of bounds */
+	if (*next >= total_entries || !h->ext_table_support)
+		return -ENOENT;
+
+	bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
+	idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+
+	while (h->buckets_ext[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
+		(*next)++;
+		if (*next == total_entries)
+			return -ENOENT;
+		bucket_idx = (*next - total_entries_main) /
+						RTE_HASH_BUCKET_ENTRIES;
+		idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+	}
+	/* Get position of entry in key table */
+	position = h->buckets_ext[bucket_idx].key_idx[idx];
+	next_key = (struct rte_hash_key *) ((char *)h->key_store +
+				position * h->key_entry_size);
+	/* Return key and data */
+	*key = next_key->key;
+	*data = next_key->pdata;
+
+	/* Increment iterator */
+	(*next)++;
+	return position - 1;
 }
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index b43f467..f190b04 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -144,6 +144,8 @@  struct rte_hash_bucket {
 	hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
 
 	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+
+	void *next;
 } __rte_cache_aligned;
 
 /** A hash table structure. */
@@ -185,7 +187,10 @@  struct rte_hash {
 	/**< Table with buckets storing all the	hash values and key indexes
 	 * to the key table.
 	 */
+	uint8_t ext_table_support;     /**< Enable ext table */
 	rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
+	struct rte_hash_bucket *buckets_ext; /**< extra bucket array */
+	struct rte_ring *free_ext_bkts; /**< ring of indexes of free buckets */
 } __rte_cache_aligned;
 
 struct queue_node {
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 9e7d931..2747522 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -37,6 +37,9 @@  extern "C" {
 /** Flag to support reader writer concurrency */
 #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
 
+/** Flag to indicate the extended table should be used */
+#define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;