[v2,7/7] hash: use partial-key hashing

Message ID 1537550255-252066-8-git-send-email-yipeng1.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series hash: add extendable bucket and partial key hashing |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Wang, Yipeng1 Sept. 21, 2018, 5:17 p.m. UTC
  This commit changes the hashing mechanism to "partial-key
hashing" to calculate bucket index and signature of key.

This is  proposed in Bin Fan, et al's paper
"MemC3: Compact and Concurrent MemCache with Dumber Caching
and Smarter Hashing". Bascially the idea is to use "xor" to
derive alternative bucket from current bucket index and
signature.

With "partial-key hashing", it reduces the bucket memory
requirement from two cache lines to one cache line, which
improves the memory efficiency and thus the lookup speed.

Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>
---
 lib/librte_hash/rte_cuckoo_hash.c | 228 ++++++++++++++++++--------------------
 lib/librte_hash/rte_cuckoo_hash.h |   6 +-
 lib/librte_hash/rte_hash.h        |   5 +-
 3 files changed, 114 insertions(+), 125 deletions(-)
  

Comments

Honnappa Nagarahalli Sept. 27, 2018, 4:24 a.m. UTC | #1
> -----Original Message-----
> From: Yipeng Wang <yipeng1.wang@intel.com>
> Sent: Friday, September 21, 2018 12:18 PM
> To: bruce.richardson@intel.com
> Cc: dev@dpdk.org; yipeng1.wang@intel.com; michel@digirati.com.br;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Subject: [PATCH v2 7/7] hash: use partial-key hashing
>
> This commit changes the hashing mechanism to "partial-key hashing" to
> calculate bucket index and signature of key.
>
> This is  proposed in Bin Fan, et al's paper
> "MemC3: Compact and Concurrent MemCache with Dumber Caching and
> Smarter Hashing". Bascially the idea is to use "xor" to derive alternative
> bucket from current bucket index and signature.
>
> With "partial-key hashing", it reduces the bucket memory requirement from
> two cache lines to one cache line, which improves the memory efficiency and
> thus the lookup speed.
>
> Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>
> ---
>  lib/librte_hash/rte_cuckoo_hash.c | 228 ++++++++++++++++++-------------------
> -
>  lib/librte_hash/rte_cuckoo_hash.h |   6 +-
>  lib/librte_hash/rte_hash.h        |   5 +-
>  3 files changed, 114 insertions(+), 125 deletions(-)
>
> diff --git a/lib/librte_hash/rte_cuckoo_hash.c
> b/lib/librte_hash/rte_cuckoo_hash.c
> index 616900b..5108ff0 100644
> --- a/lib/librte_hash/rte_cuckoo_hash.c
> +++ b/lib/librte_hash/rte_cuckoo_hash.c
> @@ -90,6 +90,27 @@ rte_hash_cmp_eq(const void *key1, const void *key2,
> const struct rte_hash *h)
>  return cmp_jump_table[h->cmp_jump_table_idx](key1, key2,
> h->key_len);  }
>
> +static inline void
> +get_buckets_index(const struct rte_hash *h, const hash_sig_t hash,
> +uint32_t *prim_bkt, uint32_t *sec_bkt, uint16_t *sig) {
> +/*
> + * We use higher 16 bits of hash as the signature value stored in table.
> + * We use the lower bits for the primary bucket
> + * location. Then we XOR primary bucket location and the signature
> + * to get the secondary bucket location. This is same as
> + * proposed in Bin Fan, et al's paper
> + * "MemC3: Compact and Concurrent MemCache with Dumber
> Caching and
> + * Smarter Hashing". The benefit to use
> + * XOR is that one could derive the alternative bucket location
> + * by only using the current bucket location and the signature.
> + */
> +*sig = hash >> 16;
> +
> +*prim_bkt = hash & h->bucket_bitmask;
> +*sec_bkt =  (*prim_bkt ^ *sig) & h->bucket_bitmask; }
> +
IMO, this function can be split into 2 - one for primary bucket index and another for secondary bucket index. The secondary bucket index calculation function can be used in functions ' rte_hash_cuckoo_move_insert_mw' and ' rte_hash_cuckoo_make_space_mw'.

>  struct rte_hash *
>  rte_hash_create(const struct rte_hash_parameters *params)  { @@ -327,9
> +348,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
>  h->ext_table_support = ext_table_support;
>
>  #if defined(RTE_ARCH_X86)
> -if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
> -h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
> -else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
> +if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
>  h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
>  else
>  #endif
> @@ -416,18 +435,6 @@ rte_hash_hash(const struct rte_hash *h, const void
> *key)
>  return h->hash_func(key, h->key_len, h->hash_func_init_val);  }
>
> -/* Calc the secondary hash value from the primary hash value of a given key
> */ -static inline hash_sig_t -rte_hash_secondary_hash(const hash_sig_t
> primary_hash) -{
> -static const unsigned all_bits_shift = 12;
> -static const unsigned alt_bits_xor = 0x5bd1e995;
> -
> -uint32_t tag = primary_hash >> all_bits_shift;
> -
> -return primary_hash ^ ((tag + 1) * alt_bits_xor);
> -}
> -
>  int32_t
>  rte_hash_count(const struct rte_hash *h)  { @@ -558,14 +565,13 @@
> enqueue_slot_back(const struct rte_hash *h,
>  /* Search a key from bucket and update its data */  static inline int32_t
> search_and_update(const struct rte_hash *h, void *data, const void *key,
> -struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
> +struct rte_hash_bucket *bkt, uint16_t sig)
>  {
>  int i;
>  struct rte_hash_key *k, *keys = h->key_store;
>
>  for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> -if (bkt->sig_current[i] == sig &&
> -bkt->sig_alt[i] == alt_hash) {
> +if (bkt->sig_current[i] == sig) {
>  k = (struct rte_hash_key *) ((char *)keys +
>  bkt->key_idx[i] * h->key_entry_size);
>  if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -
> 592,7 +598,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>  struct rte_hash_bucket *prim_bkt,
>  struct rte_hash_bucket *sec_bkt,
>  const struct rte_hash_key *key, void *data,
> -hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
> +uint16_t sig, uint32_t new_idx,
>  int32_t *ret_val)
>  {
>  unsigned int i;
> @@ -603,7 +609,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>  /* Check if key was inserted after last check but before this
>   * protected region in case of inserting duplicated keys.
>   */
> -ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
> +ret = search_and_update(h, data, key, prim_bkt, sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  *ret_val = ret;
> @@ -611,7 +617,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>  }
>
>  FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
> +ret = search_and_update(h, data, key, cur_bkt, sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  *ret_val = ret;
> @@ -626,7 +632,6 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>  /* Check if slot is available */
>  if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
>  prim_bkt->sig_current[i] = sig;
> -prim_bkt->sig_alt[i] = alt_hash;
>  prim_bkt->key_idx[i] = new_idx;
>  break;
>  }
> @@ -651,7 +656,7 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>  struct rte_hash_bucket *alt_bkt,
>  const struct rte_hash_key *key, void *data,
>  struct queue_node *leaf, uint32_t leaf_slot,
> -hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
> +uint16_t sig, uint32_t new_idx,
>  int32_t *ret_val)
>  {
>  uint32_t prev_alt_bkt_idx;
> @@ -672,7 +677,7 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>  /* Check if key was inserted after last check but before this
>   * protected region.
>   */
> -ret = search_and_update(h, data, key, bkt, sig, alt_hash);
> +ret = search_and_update(h, data, key, bkt, sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  *ret_val = ret;
> @@ -680,7 +685,7 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>  }
>
>  FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
> -ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
> +ret = search_and_update(h, data, key, cur_bkt, sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  *ret_val = ret;
> @@ -693,8 +698,9 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>  prev_bkt = prev_node->bkt;
>  prev_slot = curr_node->prev_slot;
>
> -prev_alt_bkt_idx =
> -prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
> +prev_alt_bkt_idx = (prev_node->cur_bkt_idx ^
> +prev_bkt->sig_current[prev_slot]) &
> +h->bucket_bitmask;
>
>  if (unlikely(&h->buckets[prev_alt_bkt_idx]
>  != curr_bkt)) {
> @@ -708,10 +714,8 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>   * Cuckoo insert to move elements back to its
>   * primary bucket if available
>   */
> -curr_bkt->sig_alt[curr_slot] =
> - prev_bkt->sig_current[prev_slot];
>  curr_bkt->sig_current[curr_slot] =
> -prev_bkt->sig_alt[prev_slot];
> +prev_bkt->sig_current[prev_slot];
>  curr_bkt->key_idx[curr_slot] =
>  prev_bkt->key_idx[prev_slot];
>
> @@ -721,7 +725,6 @@ rte_hash_cuckoo_move_insert_mw(const struct
> rte_hash *h,
>  }
>
>  curr_bkt->sig_current[curr_slot] = sig;
> -curr_bkt->sig_alt[curr_slot] = alt_hash;
>  curr_bkt->key_idx[curr_slot] = new_idx;
>
>  __hash_rw_writer_unlock(h);
> @@ -739,39 +742,44 @@ rte_hash_cuckoo_make_space_mw(const struct
> rte_hash *h,
>  struct rte_hash_bucket *bkt,
>  struct rte_hash_bucket *sec_bkt,
>  const struct rte_hash_key *key, void *data,
> -hash_sig_t sig, hash_sig_t alt_hash,
> +uint16_t sig, uint32_t bucket_idx,
>  uint32_t new_idx, int32_t *ret_val)
>  {
>  unsigned int i;
>  struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
>  struct queue_node *tail, *head;
>  struct rte_hash_bucket *curr_bkt, *alt_bkt;
> +uint32_t cur_idx, alt_idx;
>
>  tail = queue;
>  head = queue + 1;
>  tail->bkt = bkt;
>  tail->prev = NULL;
>  tail->prev_slot = -1;
> +tail->cur_bkt_idx = bucket_idx;
>
>  /* Cuckoo bfs Search */
>  while (likely(tail != head && head <
>  queue +
> RTE_HASH_BFS_QUEUE_MAX_LEN -
>  RTE_HASH_BUCKET_ENTRIES)) {
>  curr_bkt = tail->bkt;
> +cur_idx = tail->cur_bkt_idx;
>  for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
>  if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
>  int32_t ret =
> rte_hash_cuckoo_move_insert_mw(h,
>  bkt, sec_bkt, key, data,
> -tail, i, sig, alt_hash,
> +tail, i, sig,
>  new_idx, ret_val);
>  if (likely(ret != -1))
>  return ret;
>  }
>
>  /* Enqueue new node and keep prev node info */
> -alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
> -    & h->bucket_bitmask]);
> +alt_idx = (curr_bkt->sig_current[i] ^ cur_idx) &
> +h->bucket_bitmask;
> +alt_bkt = &(h->buckets[alt_idx]);
>  head->bkt = alt_bkt;
> +head->cur_bkt_idx = alt_idx;
>  head->prev = tail;
>  head->prev_slot = i;
>  head++;
> @@ -786,7 +794,7 @@ static inline int32_t
> __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
>  hash_sig_t sig, void *data)
>  {
> -hash_sig_t alt_hash;
> +uint16_t short_sig;
>  uint32_t prim_bucket_idx, sec_bucket_idx;
>  struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
>  struct rte_hash_key *new_k, *keys = h->key_store; @@ -801,18
> +809,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const
> void *key,
>  int32_t ret_val;
>  struct rte_hash_bucket *last;
>
> -prim_bucket_idx = sig & h->bucket_bitmask;
> +get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx,
> +&short_sig);
>  prim_bkt = &h->buckets[prim_bucket_idx];
> -rte_prefetch0(prim_bkt);
> -
> -alt_hash = rte_hash_secondary_hash(sig);
> -sec_bucket_idx = alt_hash & h->bucket_bitmask;
>  sec_bkt = &h->buckets[sec_bucket_idx];
> +rte_prefetch0(prim_bkt);
>  rte_prefetch0(sec_bkt);
>
>  /* Check if key is already inserted in primary location */
>  __hash_rw_writer_lock(h);
> -ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
> +ret = search_and_update(h, data, key, prim_bkt, short_sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  return ret;
> @@ -820,12 +825,13 @@ __rte_hash_add_key_with_hash(const struct
> rte_hash *h, const void *key,
>
>  /* Check if key is already inserted in secondary location */
>  FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
> +ret = search_and_update(h, data, key, cur_bkt, short_sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  return ret;
>  }
>  }
> +
>  __hash_rw_writer_unlock(h);
>
>  /* Did not find a match, so get a new slot for storing the new key */
> @@ -863,7 +869,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>
>  /* Find an empty slot and insert */
>  ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
> -sig, alt_hash, new_idx, &ret_val);
> +short_sig, new_idx, &ret_val);
>  if (ret == 0)
>  return new_idx - 1;
>  else if (ret == 1) {
> @@ -873,7 +879,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>
>  /* Primary bucket full, need to make space for new entry */
>  ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key,
> data,
> -sig, alt_hash, new_idx, &ret_val);
> +short_sig, prim_bucket_idx, new_idx,
> &ret_val);
>  if (ret == 0)
>  return new_idx - 1;
>  else if (ret == 1) {
> @@ -883,7 +889,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>
>  /* Also search secondary bucket to get better occupancy */
>  ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key,
> data,
> -alt_hash, sig, new_idx, &ret_val);
> +short_sig, sec_bucket_idx, new_idx, &ret_val);
>
>  if (ret == 0)
>  return new_idx - 1;
> @@ -903,14 +909,14 @@ __rte_hash_add_key_with_hash(const struct
> rte_hash *h, const void *key,
>   */
>  __hash_rw_writer_lock(h);
>  /* We check for duplicates again since could be inserted before the
> lock */
> -ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
> +ret = search_and_update(h, data, key, prim_bkt, short_sig);
>  if (ret != -1) {
>  enqueue_slot_back(h, cached_free_slots, slot_id);
>  goto failure;
>  }
>
>  FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
> +ret = search_and_update(h, data, key, cur_bkt, short_sig);
>  if (ret != -1) {
>  enqueue_slot_back(h, cached_free_slots, slot_id);
>  goto failure;
> @@ -923,8 +929,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>  for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
>  /* Check if slot is available */
>  if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
> -cur_bkt->sig_current[i] = alt_hash;
> -cur_bkt->sig_alt[i] = sig;
> +cur_bkt->sig_current[i] = short_sig;
>  cur_bkt->key_idx[i] = new_idx;
>  __hash_rw_writer_unlock(h);
>  return new_idx - 1;
> @@ -942,8 +947,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>
>  bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
>  /* Use the first location of the new bucket */
> -(h->buckets_ext[bkt_id]).sig_current[0] = alt_hash;
> -(h->buckets_ext[bkt_id]).sig_alt[0] = sig;
> +(h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
>  (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
>  /* Link the new bucket to sec bucket linked list */
>  last = rte_hash_get_last_bkt(sec_bkt); @@ -1002,7 +1006,7 @@
> rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
>
>  /* Search one bucket to find the match key */  static inline int32_t -
> search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
> +search_one_bucket(const struct rte_hash *h, const void *key, uint16_t
> +sig,
>  void **data, const struct rte_hash_bucket *bkt)  {
>  int i;
> @@ -1031,30 +1035,28 @@ static inline int32_t
> __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
>  hash_sig_t sig, void **data)
>  {
> -uint32_t bucket_idx;
> -hash_sig_t alt_hash;
> +uint32_t prim_bucket_idx, sec_bucket_idx;
>  struct rte_hash_bucket *bkt, *cur_bkt;
>  int ret;
> +uint16_t short_sig;
>
> -bucket_idx = sig & h->bucket_bitmask;
> -bkt = &h->buckets[bucket_idx];
> +get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx,
> &short_sig);
> +bkt = &h->buckets[prim_bucket_idx];
>
>  __hash_rw_reader_lock(h);
>
>  /* Check if key is in primary location */
> -ret = search_one_bucket(h, key, sig, data, bkt);
> +ret = search_one_bucket(h, key, short_sig, data, bkt);
>  if (ret != -1) {
>  __hash_rw_reader_unlock(h);
>  return ret;
>  }
>  /* Calculate secondary hash */
> -alt_hash = rte_hash_secondary_hash(sig);
> -bucket_idx = alt_hash & h->bucket_bitmask;
> -bkt = &h->buckets[bucket_idx];
> +bkt = &h->buckets[sec_bucket_idx];
>
>  /* Check if key is in secondary location */
>  FOR_EACH_BUCKET(cur_bkt, bkt) {
> -ret = search_one_bucket(h, key, alt_hash, data, cur_bkt);
> +ret = search_one_bucket(h, key, short_sig, data, cur_bkt);
>  if (ret != -1) {
>  __hash_rw_reader_unlock(h);
>  return ret;
> @@ -1101,7 +1103,6 @@ remove_entry(const struct rte_hash *h, struct
> rte_hash_bucket *bkt, unsigned i)
>  struct lcore_cache *cached_free_slots;
>
>  bkt->sig_current[i] = NULL_SIGNATURE;
> -bkt->sig_alt[i] = NULL_SIGNATURE;
>  if (h->multi_writer_support) {
>  lcore_id = rte_lcore_id();
>  cached_free_slots = &h->local_free_slots[lcore_id]; @@ -
> 1126,7 +1127,7 @@ remove_entry(const struct rte_hash *h, struct
> rte_hash_bucket *bkt, unsigned i)
>  /* Search one bucket and remove the matched key */  static inline int32_t
> search_and_remove(const struct rte_hash *h, const void *key,
> -struct rte_hash_bucket *bkt, hash_sig_t sig)
> +struct rte_hash_bucket *bkt, uint16_t sig)
>  {
>  struct rte_hash_key *k, *keys = h->key_store;
>  unsigned int i;
> @@ -1158,31 +1159,29 @@ static inline int32_t
> __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
>  hash_sig_t sig)
>  {
> -uint32_t bucket_idx;
> -hash_sig_t alt_hash;
> +uint32_t prim_bucket_idx, sec_bucket_idx;
>  struct rte_hash_bucket *prim_bkt, *sec_bkt;
>  struct rte_hash_bucket *cur_bkt, *prev_bkt, *next_bkt;
>  int32_t ret, i;
>  struct rte_hash_bucket *tobe_removed_bkt = NULL;
> +uint16_t short_sig;
>
> -bucket_idx = sig & h->bucket_bitmask;
> -prim_bkt = &h->buckets[bucket_idx];
> +get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx,
> &short_sig);
> +prim_bkt = &h->buckets[prim_bucket_idx];
>
>  __hash_rw_writer_lock(h);
>  /* look for key in primary bucket */
> -ret = search_and_remove(h, key, prim_bkt, sig);
> +ret = search_and_remove(h, key, prim_bkt, short_sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  return ret;
>  }
>
>  /* Calculate secondary hash */
> -alt_hash = rte_hash_secondary_hash(sig);
> -bucket_idx = alt_hash & h->bucket_bitmask;
> -sec_bkt = &h->buckets[bucket_idx];
> +sec_bkt = &h->buckets[sec_bucket_idx];
>
>  /* look for key in secondary bucket */
> -ret = search_and_remove(h, key, sec_bkt, alt_hash);
> +ret = search_and_remove(h, key, sec_bkt, short_sig);
>  if (ret != -1) {
>  __hash_rw_writer_unlock(h);
>  return ret;
> @@ -1192,7 +1191,7 @@ __rte_hash_del_key_with_hash(const struct
> rte_hash *h, const void *key,
>  if (h->ext_table_support) {
>  next_bkt = sec_bkt->next;
>  FOR_EACH_BUCKET(cur_bkt, next_bkt) {
> -ret = search_and_remove(h, key, cur_bkt, alt_hash);
> +ret = search_and_remove(h, key, cur_bkt, short_sig);
>  if (ret != -1)
>  goto return_bkt;
>  }
> @@ -1265,55 +1264,35 @@ static inline void  compare_signatures(uint32_t
> *prim_hash_matches, uint32_t *sec_hash_matches,
>  const struct rte_hash_bucket *prim_bkt,
>  const struct rte_hash_bucket *sec_bkt,
> -hash_sig_t prim_hash, hash_sig_t sec_hash,
> +uint16_t sig,
>  enum rte_hash_sig_compare_function sig_cmp_fn)  {
>  unsigned int i;
>
> +/* For match mask the first bit of every two bits indicates the match
> +*/
>  switch (sig_cmp_fn) {
> -#ifdef RTE_MACHINE_CPUFLAG_AVX2
> -case RTE_HASH_COMPARE_AVX2:
> -*prim_hash_matches =
> _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
> -_mm256_load_si256(
> -(__m256i const *)prim_bkt-
> >sig_current),
> -_mm256_set1_epi32(prim_hash)));
> -*sec_hash_matches =
> _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
> -_mm256_load_si256(
> -(__m256i const *)sec_bkt-
> >sig_current),
> -_mm256_set1_epi32(sec_hash)));
> -break;
> -#endif
>  #ifdef RTE_MACHINE_CPUFLAG_SSE2
>  case RTE_HASH_COMPARE_SSE:
> -/* Compare the first 4 signatures in the bucket */
> -*prim_hash_matches =
> _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
> +/* Compare all signatures in the bucket */
> +*prim_hash_matches =
> _mm_movemask_epi8(_mm_cmpeq_epi16(
>  _mm_load_si128(
>  (__m128i const *)prim_bkt-
> >sig_current),
> -_mm_set1_epi32(prim_hash)));
> -*prim_hash_matches |=
> (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
> -_mm_load_si128(
> -(__m128i const *)&prim_bkt-
> >sig_current[4]),
> -_mm_set1_epi32(prim_hash)))) << 4;
> -/* Compare the first 4 signatures in the bucket */
> -*sec_hash_matches =
> _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
> +_mm_set1_epi16(sig)));
> +/* Compare all signatures in the bucket */
> +*sec_hash_matches =
> _mm_movemask_epi8(_mm_cmpeq_epi16(
>  _mm_load_si128(
>  (__m128i const *)sec_bkt-
> >sig_current),
> -_mm_set1_epi32(sec_hash)));
> -*sec_hash_matches |=
> (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
> -_mm_load_si128(
> -(__m128i const *)&sec_bkt-
> >sig_current[4]),
> -_mm_set1_epi32(sec_hash)))) << 4;
> +_mm_set1_epi16(sig)));
>  break;
>  #endif
>  default:
>  for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
>  *prim_hash_matches |=
> -((prim_hash == prim_bkt->sig_current[i]) << i);
> +((sig == prim_bkt->sig_current[i]) << (i << 1));
>  *sec_hash_matches |=
> -((sec_hash == sec_bkt->sig_current[i]) << i);
> +((sig == sec_bkt->sig_current[i]) << (i << 1));
>  }
>  }
> -
>  }
>
>  #define PREFETCH_OFFSET 4
> @@ -1326,7 +1305,9 @@ __rte_hash_lookup_bulk(const struct rte_hash *h,
> const void **keys,
>  int32_t i;
>  int32_t ret;
>  uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
> -uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
> +uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
> +uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
> +uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
>  const struct rte_hash_bucket
> *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
>  const struct rte_hash_bucket
> *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
>  uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; @@ -
> 1345,10 +1326,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h,
> const void **keys,
>  rte_prefetch0(keys[i + PREFETCH_OFFSET]);
>
>  prim_hash[i] = rte_hash_hash(h, keys[i]);
> -sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
> +get_buckets_index(h, prim_hash[i],
> +&prim_index[i], &sec_index[i], &sig[i]);
>
> -primary_bkt[i] = &h->buckets[prim_hash[i] & h-
> >bucket_bitmask];
> -secondary_bkt[i] = &h->buckets[sec_hash[i] & h-
> >bucket_bitmask];
> +primary_bkt[i] = &h->buckets[prim_index[i]];
> +secondary_bkt[i] = &h->buckets[sec_index[i]];
>
>  rte_prefetch0(primary_bkt[i]);
>  rte_prefetch0(secondary_bkt[i]);
> @@ -1357,10 +1339,12 @@ __rte_hash_lookup_bulk(const struct rte_hash
> *h, const void **keys,
>  /* Calculate and prefetch rest of the buckets */
>  for (; i < num_keys; i++) {
>  prim_hash[i] = rte_hash_hash(h, keys[i]);
> -sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
>
> -primary_bkt[i] = &h->buckets[prim_hash[i] & h-
> >bucket_bitmask];
> -secondary_bkt[i] = &h->buckets[sec_hash[i] & h-
> >bucket_bitmask];
> +get_buckets_index(h, prim_hash[i],
> +&prim_index[i], &sec_index[i], &sig[i]);
> +
> +primary_bkt[i] = &h->buckets[prim_index[i]];
> +secondary_bkt[i] = &h->buckets[sec_index[i]];
>
>  rte_prefetch0(primary_bkt[i]);
>  rte_prefetch0(secondary_bkt[i]);
> @@ -1371,10 +1355,11 @@ __rte_hash_lookup_bulk(const struct rte_hash
> *h, const void **keys,
>  for (i = 0; i < num_keys; i++) {
>  compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
>  primary_bkt[i], secondary_bkt[i],
> -prim_hash[i], sec_hash[i], h->sig_cmp_fn);
> +sig[i], h->sig_cmp_fn);
>
>  if (prim_hitmask[i]) {
> -uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
> +uint32_t first_hit =
> +__builtin_ctzl(prim_hitmask[i]) >> 1;
>  uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
>  const struct rte_hash_key *key_slot =
>  (const struct rte_hash_key *)(
> @@ -1385,7 +1370,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h,
> const void **keys,
>  }
>
>  if (sec_hitmask[i]) {
> -uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
> +uint32_t first_hit =
> +__builtin_ctzl(sec_hitmask[i]) >> 1;
>  uint32_t key_idx = secondary_bkt[i]-
> >key_idx[first_hit];
>  const struct rte_hash_key *key_slot =
>  (const struct rte_hash_key *)(
> @@ -1399,7 +1385,8 @@ __rte_hash_lookup_bulk(const struct rte_hash *h,
> const void **keys,
>  for (i = 0; i < num_keys; i++) {
>  positions[i] = -ENOENT;
>  while (prim_hitmask[i]) {
> -uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
> +uint32_t hit_index =
> +__builtin_ctzl(prim_hitmask[i]) >> 1;
>
>  uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
>  const struct rte_hash_key *key_slot = @@ -1418,11
> +1405,12 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void
> **keys,
>  positions[i] = key_idx - 1;
>  goto next_key;
>  }
> -prim_hitmask[i] &= ~(1 << (hit_index));
> +prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
>  }
>
>  while (sec_hitmask[i]) {
> -uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
> +uint32_t hit_index =
> +__builtin_ctzl(sec_hitmask[i]) >> 1;
>
>  uint32_t key_idx = secondary_bkt[i]-
> >key_idx[hit_index];
>  const struct rte_hash_key *key_slot = @@ -1442,7
> +1430,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void
> **keys,
>  positions[i] = key_idx - 1;
>  goto next_key;
>  }
> -sec_hitmask[i] &= ~(1 << (hit_index));
> +sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
>  }
>
>  next_key:
> @@ -1465,10 +1453,10 @@ __rte_hash_lookup_bulk(const struct rte_hash
> *h, const void **keys,
>  FOR_EACH_BUCKET(cur_bkt, next_bkt) {
>  if (data != NULL)
>  ret = search_one_bucket(h, keys[i],
> -sec_hash[i], &data[i],
> cur_bkt);
> +sig[i], &data[i], cur_bkt);
>  else
>  ret = search_one_bucket(h, keys[i],
> -sec_hash[i], NULL, cur_bkt);
> +sig[i], NULL, cur_bkt);
>  if (ret != -1) {
>  positions[i] = ret;
>  hits |= 1ULL << i;
> diff --git a/lib/librte_hash/rte_cuckoo_hash.h
> b/lib/librte_hash/rte_cuckoo_hash.h
> index e601520..7753cd8 100644
> --- a/lib/librte_hash/rte_cuckoo_hash.h
> +++ b/lib/librte_hash/rte_cuckoo_hash.h
> @@ -129,18 +129,15 @@ struct rte_hash_key {  enum
> rte_hash_sig_compare_function {
>  RTE_HASH_COMPARE_SCALAR = 0,
>  RTE_HASH_COMPARE_SSE,
> -RTE_HASH_COMPARE_AVX2,
>  RTE_HASH_COMPARE_NUM
>  };
>
>  /** Bucket structure */
>  struct rte_hash_bucket {
> -hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
> +uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES];
>
>  uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
>
> -hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
> -
>  uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
>
>  void *next;
> @@ -193,6 +190,7 @@ struct rte_hash {
>
>  struct queue_node {
>  struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
> +uint32_t cur_bkt_idx;
>
>  struct queue_node *prev;     /* Parent(bucket) in search path */
>  int prev_slot;               /* Parent(slot) in search path */
> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index
> 11d8e28..0bd7696 100644
> --- a/lib/librte_hash/rte_hash.h
> +++ b/lib/librte_hash/rte_hash.h
> @@ -40,7 +40,10 @@ extern "C" {
>  /** Flag to indicate the extendabe bucket table feature should be used */
> #define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
>
> -/** Signature of key that is stored internally. */
> +/**
> + * A hash value that is used to generate signature stored in table and
> +the
> + * location the signature is stored.
> + */
This is an external file. This documentation goes into the API guide. IMO, we should change the comment to help the user. How about changing this to 'hash value of the key'?

>  typedef uint32_t hash_sig_t;
>
>  /** Type of function that can be used for calculating the hash value. */
> --
> 2.7.4

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
  
Wang, Yipeng1 Sept. 29, 2018, 12:55 a.m. UTC | #2
>-----Original Message-----
>From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]
>Sent: Wednesday, September 26, 2018 9:24 PM
>To: Wang, Yipeng1 <yipeng1.wang@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>
>Cc: dev@dpdk.org; michel@digirati.com.br
>Subject: RE: [PATCH v2 7/7] hash: use partial-key hashing
>
>
>> +static inline void
>> +get_buckets_index(const struct rte_hash *h, const hash_sig_t hash,
>> +uint32_t *prim_bkt, uint32_t *sec_bkt, uint16_t *sig) {
>> +/*
>> + * We use higher 16 bits of hash as the signature value stored in table.
>> + * We use the lower bits for the primary bucket
>> + * location. Then we XOR primary bucket location and the signature
>> + * to get the secondary bucket location. This is same as
>> + * proposed in Bin Fan, et al's paper
>> + * "MemC3: Compact and Concurrent MemCache with Dumber
>> Caching and
>> + * Smarter Hashing". The benefit to use
>> + * XOR is that one could derive the alternative bucket location
>> + * by only using the current bucket location and the signature.
>> + */
>> +*sig = hash >> 16;
>> +
>> +*prim_bkt = hash & h->bucket_bitmask;
>> +*sec_bkt =  (*prim_bkt ^ *sig) & h->bucket_bitmask; }
>> +
>IMO, this function can be split into 2 - one for primary bucket index and another for secondary bucket index. The secondary bucket
>index calculation function can be used in functions ' rte_hash_cuckoo_move_insert_mw' and ' rte_hash_cuckoo_make_space_mw'.
>
[Wang, Yipeng] I agree that breaking them down and use function call instead of explicit code will be easier for future extension,
i.e. changing the algorithm, etc. 
I split the function into 3 in V4. Please check out.

>> -/** Signature of key that is stored internally. */
>> +/**
>> + * A hash value that is used to generate signature stored in table and
>> +the
>> + * location the signature is stored.
>> + */
>This is an external file. This documentation goes into the API guide. IMO, we should change the comment to help the user. How about
>changing this to 'hash value of the key'?
>
[Wang, Yipeng] Improved in V4. Please check! Thanks!
  

Patch

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 616900b..5108ff0 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -90,6 +90,27 @@  rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
 		return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
 }
 
+static inline void
+get_buckets_index(const struct rte_hash *h, const hash_sig_t hash,
+		uint32_t *prim_bkt, uint32_t *sec_bkt, uint16_t *sig)
+{
+	/*
+	 * We use higher 16 bits of hash as the signature value stored in table.
+	 * We use the lower bits for the primary bucket
+	 * location. Then we XOR primary bucket location and the signature
+	 * to get the secondary bucket location. This is same as
+	 * proposed in Bin Fan, et al's paper
+	 * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
+	 * Smarter Hashing". The benefit to use
+	 * XOR is that one could derive the alternative bucket location
+	 * by only using the current bucket location and the signature.
+	 */
+	*sig = hash >> 16;
+
+	*prim_bkt = hash & h->bucket_bitmask;
+	*sec_bkt =  (*prim_bkt ^ *sig) & h->bucket_bitmask;
+}
+
 struct rte_hash *
 rte_hash_create(const struct rte_hash_parameters *params)
 {
@@ -327,9 +348,7 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	h->ext_table_support = ext_table_support;
 
 #if defined(RTE_ARCH_X86)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
-		h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
-	else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
 		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
 	else
 #endif
@@ -416,18 +435,6 @@  rte_hash_hash(const struct rte_hash *h, const void *key)
 	return h->hash_func(key, h->key_len, h->hash_func_init_val);
 }
 
-/* Calc the secondary hash value from the primary hash value of a given key */
-static inline hash_sig_t
-rte_hash_secondary_hash(const hash_sig_t primary_hash)
-{
-	static const unsigned all_bits_shift = 12;
-	static const unsigned alt_bits_xor = 0x5bd1e995;
-
-	uint32_t tag = primary_hash >> all_bits_shift;
-
-	return primary_hash ^ ((tag + 1) * alt_bits_xor);
-}
-
 int32_t
 rte_hash_count(const struct rte_hash *h)
 {
@@ -558,14 +565,13 @@  enqueue_slot_back(const struct rte_hash *h,
 /* Search a key from bucket and update its data */
 static inline int32_t
 search_and_update(const struct rte_hash *h, void *data, const void *key,
-	struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+	struct rte_hash_bucket *bkt, uint16_t sig)
 {
 	int i;
 	struct rte_hash_key *k, *keys = h->key_store;
 
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == sig &&
-				bkt->sig_alt[i] == alt_hash) {
+		if (bkt->sig_current[i] == sig) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -592,7 +598,7 @@  rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 		struct rte_hash_bucket *prim_bkt,
 		struct rte_hash_bucket *sec_bkt,
 		const struct rte_hash_key *key, void *data,
-		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+		uint16_t sig, uint32_t new_idx,
 		int32_t *ret_val)
 {
 	unsigned int i;
@@ -603,7 +609,7 @@  rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 	/* Check if key was inserted after last check but before this
 	 * protected region in case of inserting duplicated keys.
 	 */
-	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
@@ -611,7 +617,7 @@  rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 	}
 
 	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
-		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		ret = search_and_update(h, data, key, cur_bkt, sig);
 		if (ret != -1) {
 			__hash_rw_writer_unlock(h);
 			*ret_val = ret;
@@ -626,7 +632,6 @@  rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 		/* Check if slot is available */
 		if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
 			prim_bkt->sig_current[i] = sig;
-			prim_bkt->sig_alt[i] = alt_hash;
 			prim_bkt->key_idx[i] = new_idx;
 			break;
 		}
@@ -651,7 +656,7 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 			struct rte_hash_bucket *alt_bkt,
 			const struct rte_hash_key *key, void *data,
 			struct queue_node *leaf, uint32_t leaf_slot,
-			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+			uint16_t sig, uint32_t new_idx,
 			int32_t *ret_val)
 {
 	uint32_t prev_alt_bkt_idx;
@@ -672,7 +677,7 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 	/* Check if key was inserted after last check but before this
 	 * protected region.
 	 */
-	ret = search_and_update(h, data, key, bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, bkt, sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
@@ -680,7 +685,7 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 	}
 
 	FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
-		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		ret = search_and_update(h, data, key, cur_bkt, sig);
 		if (ret != -1) {
 			__hash_rw_writer_unlock(h);
 			*ret_val = ret;
@@ -693,8 +698,9 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 		prev_bkt = prev_node->bkt;
 		prev_slot = curr_node->prev_slot;
 
-		prev_alt_bkt_idx =
-			prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+		prev_alt_bkt_idx = (prev_node->cur_bkt_idx ^
+				prev_bkt->sig_current[prev_slot]) &
+				h->bucket_bitmask;
 
 		if (unlikely(&h->buckets[prev_alt_bkt_idx]
 				!= curr_bkt)) {
@@ -708,10 +714,8 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 		 * Cuckoo insert to move elements back to its
 		 * primary bucket if available
 		 */
-		curr_bkt->sig_alt[curr_slot] =
-			 prev_bkt->sig_current[prev_slot];
 		curr_bkt->sig_current[curr_slot] =
-			prev_bkt->sig_alt[prev_slot];
+			prev_bkt->sig_current[prev_slot];
 		curr_bkt->key_idx[curr_slot] =
 			prev_bkt->key_idx[prev_slot];
 
@@ -721,7 +725,6 @@  rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 	}
 
 	curr_bkt->sig_current[curr_slot] = sig;
-	curr_bkt->sig_alt[curr_slot] = alt_hash;
 	curr_bkt->key_idx[curr_slot] = new_idx;
 
 	__hash_rw_writer_unlock(h);
@@ -739,39 +742,44 @@  rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
 			struct rte_hash_bucket *bkt,
 			struct rte_hash_bucket *sec_bkt,
 			const struct rte_hash_key *key, void *data,
-			hash_sig_t sig, hash_sig_t alt_hash,
+			uint16_t sig, uint32_t bucket_idx,
 			uint32_t new_idx, int32_t *ret_val)
 {
 	unsigned int i;
 	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
 	struct queue_node *tail, *head;
 	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+	uint32_t cur_idx, alt_idx;
 
 	tail = queue;
 	head = queue + 1;
 	tail->bkt = bkt;
 	tail->prev = NULL;
 	tail->prev_slot = -1;
+	tail->cur_bkt_idx = bucket_idx;
 
 	/* Cuckoo bfs Search */
 	while (likely(tail != head && head <
 					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
 					RTE_HASH_BUCKET_ENTRIES)) {
 		curr_bkt = tail->bkt;
+		cur_idx = tail->cur_bkt_idx;
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
 				int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
 						bkt, sec_bkt, key, data,
-						tail, i, sig, alt_hash,
+						tail, i, sig,
 						new_idx, ret_val);
 				if (likely(ret != -1))
 					return ret;
 			}
 
 			/* Enqueue new node and keep prev node info */
-			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
-						    & h->bucket_bitmask]);
+			alt_idx = (curr_bkt->sig_current[i] ^ cur_idx) &
+							h->bucket_bitmask;
+			alt_bkt = &(h->buckets[alt_idx]);
 			head->bkt = alt_bkt;
+			head->cur_bkt_idx = alt_idx;
 			head->prev = tail;
 			head->prev_slot = i;
 			head++;
@@ -786,7 +794,7 @@  static inline int32_t
 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 						hash_sig_t sig, void *data)
 {
-	hash_sig_t alt_hash;
+	uint16_t short_sig;
 	uint32_t prim_bucket_idx, sec_bucket_idx;
 	struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
 	struct rte_hash_key *new_k, *keys = h->key_store;
@@ -801,18 +809,15 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	int32_t ret_val;
 	struct rte_hash_bucket *last;
 
-	prim_bucket_idx = sig & h->bucket_bitmask;
+	get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig);
 	prim_bkt = &h->buckets[prim_bucket_idx];
-	rte_prefetch0(prim_bkt);
-
-	alt_hash = rte_hash_secondary_hash(sig);
-	sec_bucket_idx = alt_hash & h->bucket_bitmask;
 	sec_bkt = &h->buckets[sec_bucket_idx];
+	rte_prefetch0(prim_bkt);
 	rte_prefetch0(sec_bkt);
 
 	/* Check if key is already inserted in primary location */
 	__hash_rw_writer_lock(h);
-	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, short_sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
@@ -820,12 +825,13 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is already inserted in secondary location */
 	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
-		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		ret = search_and_update(h, data, key, cur_bkt, short_sig);
 		if (ret != -1) {
 			__hash_rw_writer_unlock(h);
 			return ret;
 		}
 	}
+
 	__hash_rw_writer_unlock(h);
 
 	/* Did not find a match, so get a new slot for storing the new key */
@@ -863,7 +869,7 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Find an empty slot and insert */
 	ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
-					sig, alt_hash, new_idx, &ret_val);
+					short_sig, new_idx, &ret_val);
 	if (ret == 0)
 		return new_idx - 1;
 	else if (ret == 1) {
@@ -873,7 +879,7 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Primary bucket full, need to make space for new entry */
 	ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
-					sig, alt_hash, new_idx, &ret_val);
+				short_sig, prim_bucket_idx, new_idx, &ret_val);
 	if (ret == 0)
 		return new_idx - 1;
 	else if (ret == 1) {
@@ -883,7 +889,7 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Also search secondary bucket to get better occupancy */
 	ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
-					alt_hash, sig, new_idx, &ret_val);
+				short_sig, sec_bucket_idx, new_idx, &ret_val);
 
 	if (ret == 0)
 		return new_idx - 1;
@@ -903,14 +909,14 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	 */
 	__hash_rw_writer_lock(h);
 	/* We check for duplicates again since could be inserted before the lock */
-	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, short_sig);
 	if (ret != -1) {
 		enqueue_slot_back(h, cached_free_slots, slot_id);
 		goto failure;
 	}
 
 	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
-		ret = search_and_update(h, data, key, cur_bkt, alt_hash, sig);
+		ret = search_and_update(h, data, key, cur_bkt, short_sig);
 		if (ret != -1) {
 			enqueue_slot_back(h, cached_free_slots, slot_id);
 			goto failure;
@@ -923,8 +929,7 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			/* Check if slot is available */
 			if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
-				cur_bkt->sig_current[i] = alt_hash;
-				cur_bkt->sig_alt[i] = sig;
+				cur_bkt->sig_current[i] = short_sig;
 				cur_bkt->key_idx[i] = new_idx;
 				__hash_rw_writer_unlock(h);
 				return new_idx - 1;
@@ -942,8 +947,7 @@  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
 	/* Use the first location of the new bucket */
-	(h->buckets_ext[bkt_id]).sig_current[0] = alt_hash;
-	(h->buckets_ext[bkt_id]).sig_alt[0] = sig;
+	(h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
 	(h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
 	/* Link the new bucket to sec bucket linked list */
 	last = rte_hash_get_last_bkt(sec_bkt);
@@ -1002,7 +1006,7 @@  rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 
 /* Search one bucket to find the match key */
 static inline int32_t
-search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
 			void **data, const struct rte_hash_bucket *bkt)
 {
 	int i;
@@ -1031,30 +1035,28 @@  static inline int32_t
 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 					hash_sig_t sig, void **data)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
+	uint32_t prim_bucket_idx, sec_bucket_idx;
 	struct rte_hash_bucket *bkt, *cur_bkt;
 	int ret;
+	uint16_t short_sig;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig);
+	bkt = &h->buckets[prim_bucket_idx];
 
 	__hash_rw_reader_lock(h);
 
 	/* Check if key is in primary location */
-	ret = search_one_bucket(h, key, sig, data, bkt);
+	ret = search_one_bucket(h, key, short_sig, data, bkt);
 	if (ret != -1) {
 		__hash_rw_reader_unlock(h);
 		return ret;
 	}
 	/* Calculate secondary hash */
-	alt_hash = rte_hash_secondary_hash(sig);
-	bucket_idx = alt_hash & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	bkt = &h->buckets[sec_bucket_idx];
 
 	/* Check if key is in secondary location */
 	FOR_EACH_BUCKET(cur_bkt, bkt) {
-		ret = search_one_bucket(h, key, alt_hash, data, cur_bkt);
+		ret = search_one_bucket(h, key, short_sig, data, cur_bkt);
 		if (ret != -1) {
 			__hash_rw_reader_unlock(h);
 			return ret;
@@ -1101,7 +1103,6 @@  remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	struct lcore_cache *cached_free_slots;
 
 	bkt->sig_current[i] = NULL_SIGNATURE;
-	bkt->sig_alt[i] = NULL_SIGNATURE;
 	if (h->multi_writer_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
@@ -1126,7 +1127,7 @@  remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 /* Search one bucket and remove the matched key */
 static inline int32_t
 search_and_remove(const struct rte_hash *h, const void *key,
-			struct rte_hash_bucket *bkt, hash_sig_t sig)
+			struct rte_hash_bucket *bkt, uint16_t sig)
 {
 	struct rte_hash_key *k, *keys = h->key_store;
 	unsigned int i;
@@ -1158,31 +1159,29 @@  static inline int32_t
 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 						hash_sig_t sig)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
+	uint32_t prim_bucket_idx, sec_bucket_idx;
 	struct rte_hash_bucket *prim_bkt, *sec_bkt;
 	struct rte_hash_bucket *cur_bkt, *prev_bkt, *next_bkt;
 	int32_t ret, i;
 	struct rte_hash_bucket *tobe_removed_bkt = NULL;
+	uint16_t short_sig;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	prim_bkt = &h->buckets[bucket_idx];
+	get_buckets_index(h, sig, &prim_bucket_idx, &sec_bucket_idx, &short_sig);
+	prim_bkt = &h->buckets[prim_bucket_idx];
 
 	__hash_rw_writer_lock(h);
 	/* look for key in primary bucket */
-	ret = search_and_remove(h, key, prim_bkt, sig);
+	ret = search_and_remove(h, key, prim_bkt, short_sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
 	}
 
 	/* Calculate secondary hash */
-	alt_hash = rte_hash_secondary_hash(sig);
-	bucket_idx = alt_hash & h->bucket_bitmask;
-	sec_bkt = &h->buckets[bucket_idx];
+	sec_bkt = &h->buckets[sec_bucket_idx];
 
 	/* look for key in secondary bucket */
-	ret = search_and_remove(h, key, sec_bkt, alt_hash);
+	ret = search_and_remove(h, key, sec_bkt, short_sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
@@ -1192,7 +1191,7 @@  __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 	if (h->ext_table_support) {
 		next_bkt = sec_bkt->next;
 		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
-			ret = search_and_remove(h, key, cur_bkt, alt_hash);
+			ret = search_and_remove(h, key, cur_bkt, short_sig);
 			if (ret != -1)
 				goto return_bkt;
 		}
@@ -1265,55 +1264,35 @@  static inline void
 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 			const struct rte_hash_bucket *prim_bkt,
 			const struct rte_hash_bucket *sec_bkt,
-			hash_sig_t prim_hash, hash_sig_t sec_hash,
+			uint16_t sig,
 			enum rte_hash_sig_compare_function sig_cmp_fn)
 {
 	unsigned int i;
 
+	/* For match mask the first bit of every two bits indicates the match */
 	switch (sig_cmp_fn) {
-#ifdef RTE_MACHINE_CPUFLAG_AVX2
-	case RTE_HASH_COMPARE_AVX2:
-		*prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-				_mm256_load_si256(
-					(__m256i const *)prim_bkt->sig_current),
-				_mm256_set1_epi32(prim_hash)));
-		*sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-				_mm256_load_si256(
-					(__m256i const *)sec_bkt->sig_current),
-				_mm256_set1_epi32(sec_hash)));
-		break;
-#endif
 #ifdef RTE_MACHINE_CPUFLAG_SSE2
 	case RTE_HASH_COMPARE_SSE:
-		/* Compare the first 4 signatures in the bucket */
-		*prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+		/* Compare all signatures in the bucket */
+		*prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
 				_mm_load_si128(
 					(__m128i const *)prim_bkt->sig_current),
-				_mm_set1_epi32(prim_hash)));
-		*prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-				_mm_load_si128(
-					(__m128i const *)&prim_bkt->sig_current[4]),
-				_mm_set1_epi32(prim_hash)))) << 4;
-		/* Compare the first 4 signatures in the bucket */
-		*sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_set1_epi16(sig)));
+		/* Compare all signatures in the bucket */
+		*sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
 				_mm_load_si128(
 					(__m128i const *)sec_bkt->sig_current),
-				_mm_set1_epi32(sec_hash)));
-		*sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-				_mm_load_si128(
-					(__m128i const *)&sec_bkt->sig_current[4]),
-				_mm_set1_epi32(sec_hash)))) << 4;
+				_mm_set1_epi16(sig)));
 		break;
 #endif
 	default:
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			*prim_hash_matches |=
-				((prim_hash == prim_bkt->sig_current[i]) << i);
+				((sig == prim_bkt->sig_current[i]) << (i << 1));
 			*sec_hash_matches |=
-				((sec_hash == sec_bkt->sig_current[i]) << i);
+				((sig == sec_bkt->sig_current[i]) << (i << 1));
 		}
 	}
-
 }
 
 #define PREFETCH_OFFSET 4
@@ -1326,7 +1305,9 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 	int32_t i;
 	int32_t ret;
 	uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
-	uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
 	const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
 	const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
 	uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
@@ -1345,10 +1326,11 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		rte_prefetch0(keys[i + PREFETCH_OFFSET]);
 
 		prim_hash[i] = rte_hash_hash(h, keys[i]);
-		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
+		get_buckets_index(h, prim_hash[i],
+				&prim_index[i], &sec_index[i], &sig[i]);
 
-		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
 
 		rte_prefetch0(primary_bkt[i]);
 		rte_prefetch0(secondary_bkt[i]);
@@ -1357,10 +1339,12 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 	/* Calculate and prefetch rest of the buckets */
 	for (; i < num_keys; i++) {
 		prim_hash[i] = rte_hash_hash(h, keys[i]);
-		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+		get_buckets_index(h, prim_hash[i],
+				&prim_index[i], &sec_index[i], &sig[i]);
+
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
 
 		rte_prefetch0(primary_bkt[i]);
 		rte_prefetch0(secondary_bkt[i]);
@@ -1371,10 +1355,11 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 	for (i = 0; i < num_keys; i++) {
 		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
 				primary_bkt[i], secondary_bkt[i],
-				prim_hash[i], sec_hash[i], h->sig_cmp_fn);
+				sig[i], h->sig_cmp_fn);
 
 		if (prim_hitmask[i]) {
-			uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
+			uint32_t first_hit =
+					__builtin_ctzl(prim_hitmask[i]) >> 1;
 			uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
 			const struct rte_hash_key *key_slot =
 				(const struct rte_hash_key *)(
@@ -1385,7 +1370,8 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		}
 
 		if (sec_hitmask[i]) {
-			uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
+			uint32_t first_hit =
+					__builtin_ctzl(sec_hitmask[i]) >> 1;
 			uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
 			const struct rte_hash_key *key_slot =
 				(const struct rte_hash_key *)(
@@ -1399,7 +1385,8 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 	for (i = 0; i < num_keys; i++) {
 		positions[i] = -ENOENT;
 		while (prim_hitmask[i]) {
-			uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
+			uint32_t hit_index =
+					__builtin_ctzl(prim_hitmask[i]) >> 1;
 
 			uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
 			const struct rte_hash_key *key_slot =
@@ -1418,11 +1405,12 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 				positions[i] = key_idx - 1;
 				goto next_key;
 			}
-			prim_hitmask[i] &= ~(1 << (hit_index));
+			prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
 		}
 
 		while (sec_hitmask[i]) {
-			uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
+			uint32_t hit_index =
+					__builtin_ctzl(sec_hitmask[i]) >> 1;
 
 			uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
 			const struct rte_hash_key *key_slot =
@@ -1442,7 +1430,7 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 				positions[i] = key_idx - 1;
 				goto next_key;
 			}
-			sec_hitmask[i] &= ~(1 << (hit_index));
+			sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
 		}
 
 next_key:
@@ -1465,10 +1453,10 @@  __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
 			if (data != NULL)
 				ret = search_one_bucket(h, keys[i],
-						sec_hash[i], &data[i], cur_bkt);
+						sig[i], &data[i], cur_bkt);
 			else
 				ret = search_one_bucket(h, keys[i],
-						sec_hash[i], NULL, cur_bkt);
+						sig[i], NULL, cur_bkt);
 			if (ret != -1) {
 				positions[i] = ret;
 				hits |= 1ULL << i;
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index e601520..7753cd8 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -129,18 +129,15 @@  struct rte_hash_key {
 enum rte_hash_sig_compare_function {
 	RTE_HASH_COMPARE_SCALAR = 0,
 	RTE_HASH_COMPARE_SSE,
-	RTE_HASH_COMPARE_AVX2,
 	RTE_HASH_COMPARE_NUM
 };
 
 /** Bucket structure */
 struct rte_hash_bucket {
-	hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+	uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES];
 
 	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
 
-	hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
-
 	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
 
 	void *next;
@@ -193,6 +190,7 @@  struct rte_hash {
 
 struct queue_node {
 	struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+	uint32_t cur_bkt_idx;
 
 	struct queue_node *prev;     /* Parent(bucket) in search path */
 	int prev_slot;               /* Parent(slot) in search path */
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 11d8e28..0bd7696 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -40,7 +40,10 @@  extern "C" {
 /** Flag to indicate the extendabe bucket table feature should be used */
 #define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
 
-/** Signature of key that is stored internally. */
+/**
+ * A hash value that is used to generate signature stored in table and the
+ * location the signature is stored.
+ */
 typedef uint32_t hash_sig_t;
 
 /** Type of function that can be used for calculating the hash value. */