diff mbox series

[v2,2/3] hash: add predictable RSS implementation

Message ID	1617738643-258635-3-git-send-email-vladimir.medvedkin@intel.com (mailing list archive)
State	Superseded, archived
Delegated to:	David Marchand
Headers	IronPort-SDR: 5kpyAKJojCd//HI5y5rhhYvASN8IPX401q5uDXCBOcwZKh0iNFLa8oOQ7HnR5VDw+2f7YtX64g homEHGv09EDg== IronPort-SDR: JLKyr6lQSg7abrhYyzqpiyAc9C44n2zkdEPtPvb9lTMG+SK3KXPxxlXcxWCMZ8bL8YOxmbrGs2 b1oVb2NJWdWQ== From: Vladimir Medvedkin <vladimir.medvedkin@intel.com> To: dev@dpdk.org Cc: konstantin.ananyev@intel.com, andrey.chilikin@intel.com, ray.kinsella@intel.com, yipeng1.wang@intel.com, sameh.gobriel@intel.com, bruce.richardson@intel.com Date: Tue, 6 Apr 2021 20:50:42 +0100 Message-Id: <1617738643-258635-3-git-send-email-vladimir.medvedkin@intel.com> In-Reply-To: <1617738643-258635-1-git-send-email-vladimir.medvedkin@intel.com> References: <1617738643-258635-1-git-send-email-vladimir.medvedkin@intel.com> In-Reply-To: <1615919077-77774-1-git-send-email-vladimir.medvedkin@intel.com> References: <1615919077-77774-1-git-send-email-vladimir.medvedkin@intel.com> Subject: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	Predictable RSS feature \| [v2,0/3] Predictable RSS feature [v2,1/3] hash: add predictable RSS API [v2,2/3] hash: add predictable RSS implementation [v2,3/3] test/hash: add additional thash tests

Checks

Context	Check	Description
ci/checkpatch	warning	coding style issues

Commit Message

Vladimir Medvedkin April 6, 2021, 7:50 p.m. UTC

  This patch implements predictable RSS functionality.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/rte_thash.c | 577 ++++++++++++++++++++++++++++++++++++++++++--
 lib/librte_hash/rte_thash.h |  42 ++++
 lib/librte_hash/version.map |   1 +
 3 files changed, 602 insertions(+), 18 deletions(-)

Comments

Ananyev, Konstantin April 7, 2021, 12:53 p.m. UTC | #1

Hi Vladimir,

Few comments below, mostly minor.
One generic one - doc seems missing.
With that in place:
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 
> This patch implements predictable RSS functionality.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/librte_hash/rte_thash.c | 577 ++++++++++++++++++++++++++++++++++++++++++--
>  lib/librte_hash/rte_thash.h |  42 ++++
>  lib/librte_hash/version.map |   1 +
>  3 files changed, 602 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
> index 79e8724..cc60ada 100644
> --- a/lib/librte_hash/rte_thash.c
> +++ b/lib/librte_hash/rte_thash.c
> @@ -12,6 +12,45 @@
>  #include <rte_malloc.h>
> 
>  #define THASH_NAME_LEN		64
> +#define TOEPLITZ_HASH_LEN	32
> +
> +#define	RETA_SZ_MIN	2U
> +#define	RETA_SZ_MAX	16U

Should these RETA_SZ defines be in public header?
So user can know what are allowed values?

> +#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
> +					(reta_sz <= RETA_SZ_MAX))
> +
> +TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
> +static struct rte_tailq_elem rte_thash_tailq = {
> +	.name = "RTE_THASH",
> +};
> +EAL_REGISTER_TAILQ(rte_thash_tailq)
> +
> +/**
> + * Table of some irreducible polinomials over GF(2).
> + * For lfsr they are reperesented in BE bit order, and
> + * x^0 is masked out.
> + * For example, poly x^5 + x^2 + 1 will be represented
> + * as (101001b & 11111b) = 01001b = 0x9
> + */
> +static const uint32_t irreducible_poly_table[][4] = {
> +	{0, 0, 0, 0},	/** < degree 0 */
> +	{1, 1, 1, 1},	/** < degree 1 */
> +	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
> +	{0x5, 0x3, 0x5, 0x3},
> +	{0x9, 0x3, 0x9, 0x3},
> +	{0x9, 0x1b, 0xf, 0x5},
> +	{0x21, 0x33, 0x1b, 0x2d},
> +	{0x41, 0x11, 0x71, 0x9},
> +	{0x71, 0xa9, 0xf5, 0x8d},
> +	{0x21, 0xd1, 0x69, 0x1d9},
> +	{0x81, 0x2c1, 0x3b1, 0x185},
> +	{0x201, 0x541, 0x341, 0x461},
> +	{0x941, 0x609, 0xe19, 0x45d},
> +	{0x1601, 0x1f51, 0x1171, 0x359},
> +	{0x2141, 0x2111, 0x2db1, 0x2109},
> +	{0x4001, 0x801, 0x101, 0x7301},
> +	{0x7781, 0xa011, 0x4211, 0x86d9},
> +};
> 
>  struct thash_lfsr {
>  	uint32_t	ref_cnt;
> @@ -31,8 +70,10 @@ struct rte_thash_subtuple_helper {
>  	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
>  	LIST_ENTRY(rte_thash_subtuple_helper)	next;
>  	struct thash_lfsr	*lfsr;
> -	uint32_t	offset;		/** < Offset in bits of the subtuple */
> -	uint32_t	len;		/** < Length in bits of the subtuple */
> +	uint32_t	offset;		/** < Offset of the m-sequence */
> +	uint32_t	len;		/** < Length of the m-sequence */
> +	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
> +	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
>  	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
>  	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
>  	/** < Complimentary table */
> @@ -48,49 +89,549 @@ struct rte_thash_ctx {
>  	uint8_t		hash_key[0];
>  };
> 
> +static inline uint32_t
> +get_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	/*
> +	 * masking the TAP bits defined by the polynomial and
> +	 * calculating parity
> +	 */
> +	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
> +	ret = lfsr->state & 0x1;
> +	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +get_rev_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
> +	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
> +	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +thash_get_rand_poly(uint32_t poly_degree)
> +{
> +	return irreducible_poly_table[poly_degree][rte_rand() %
> +		RTE_DIM(irreducible_poly_table[poly_degree])];
> +}
> +
> +static struct thash_lfsr *
> +alloc_lfsr(struct rte_thash_ctx *ctx)
> +{
> +	struct thash_lfsr *lfsr;
> +	uint32_t i;
> +
> +	if (ctx == NULL)
> +		return NULL;
> +
> +	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
> +	if (lfsr == NULL)
> +		return NULL;
> +
> +	lfsr->deg = ctx->reta_sz_log;
> +	lfsr->poly = thash_get_rand_poly(lfsr->deg);
> +	do {
> +		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
> +	} while (lfsr->state == 0);
> +	/* init reverse order polynomial */
> +	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
> +	/* init proper rev_state*/
> +	lfsr->rev_state = lfsr->state;
> +	for (i = 0; i <= lfsr->deg; i++)
> +		get_rev_bit_lfsr(lfsr);
> +
> +	/* clear bits_cnt after rev_state was inited */
> +	lfsr->bits_cnt = 0;
> +	lfsr->ref_cnt = 1;
> +
> +	return lfsr;
> +}
> +
> +static void
> +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt++;
> +	h->lfsr = lfsr;
> +}
> +
> +static void
> +free_lfsr(struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt--;
> +	if (lfsr->ref_cnt == 0)
> +		rte_free(lfsr);
> +}
> +
>  struct rte_thash_ctx *
> -rte_thash_init_ctx(const char *name __rte_unused,
> -	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
> -	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> +	uint8_t *key, uint32_t flags)
>  {
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	uint32_t i;

Empty line is  missing.

> +	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* guarantee there's no existing */
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +	ctx = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	/* allocate tailq entry */
> +	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH,
> +			"Can not allocate tailq entry for thash context %s\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto exit;
> +	}
> +
> +	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
> +	if (ctx == NULL) {
> +		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto free_te;
> +	}
> +
> +	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
> +	ctx->key_len = key_len;
> +	ctx->reta_sz_log = reta_sz;
> +	LIST_INIT(&ctx->head);
> +	ctx->flags = flags;
> +
> +	if (key)
> +		rte_memcpy(ctx->hash_key, key, key_len);
> +	else {
> +		for (i = 0; i < key_len; i++)
> +			ctx->hash_key[i] = rte_rand();
> +	}
> +
> +	te->data = (void *)ctx;
> +	TAILQ_INSERT_TAIL(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ctx;
> +free_te:
> +	rte_free(te);
> +exit:
> +	rte_mcfg_tailq_write_unlock();
>  	return NULL;
>  }
> 
>  struct rte_thash_ctx *
> -rte_thash_find_existing(const char *name __rte_unused)
> +rte_thash_find_existing(const char *name)
>  {
> -	return NULL;
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return ctx;
>  }
> 
>  void
> -rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx)
>  {
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	struct rte_thash_subtuple_helper *ent, *tmp;
> +
> +	if (ctx == NULL)
> +		return;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		if (te->data == (void *)ctx)
> +			break;
> +	}
> +
> +	if (te != NULL)
> +		TAILQ_REMOVE(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +	ent = LIST_FIRST(&(ctx->head));
> +	while (ent) {
> +		free_lfsr(ent->lfsr);
> +		tmp = ent;
> +		ent = LIST_NEXT(ent, next);
> +		LIST_REMOVE(tmp, next);
> +		rte_free(tmp);
> +	}
> +
> +	rte_free(ctx);
> +	rte_free(te);
> +}
> +
> +static inline void
> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> +{
> +	uint32_t byte_idx = pos >> 3;

Just as a nit to be consistent with the line below:
pos / CHAR_BIT; 

> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp &= ~(1 << bit_idx);
> +	tmp |= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +/**
> + * writes m-sequence to the hash_key for range [start, end]
> + * (i.e. including start and end positions)
> + */
> +static int
> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
> +	uint32_t start, uint32_t end)
> +{
> +	uint32_t i;
> +	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
> +	req_bits++; /* due to incuding end */
> +
> +	/* check if lfsr overflow period of the m-sequence */
> +	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
> +			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
> +			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
> +		return -ENOSPC;
> +
> +	if (start < end) {
> +		/* original direction (from left to right)*/
> +		for (i = start; i <= end; i++)
> +			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
> +
> +	} else {
> +		/* reverse direction (from right to left) */
> +		for (i = end; i >= start; i--)
> +			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
> +	}
> +
> +	return 0;
> +}
> +
> +static inline uint32_t
> +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
> +{
> +	uint32_t *tmp, val;
> +
> +	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
> +	val = rte_be_to_cpu_32(*tmp);
> +	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
> +		ctx->reta_sz_log));
> +
> +	return val & ((1 << ctx->reta_sz_log) - 1);
> +}
> +
> +static inline void
> +generate_compliment_table(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *h)
> +{
> +	int i, j, k;
> +	uint32_t val;
> +	uint32_t start;
> +
> +	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
> +
> +	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
> +		val = 0;
> +		for (j = i; j; j &= (j - 1)) {
> +			k = rte_bsf32(j);
> +			val ^= get_subvalue(ctx, start - k +
> +				ctx->reta_sz_log - 1);
> +		}
> +		h->compl_table[val] = i;
> +	}
> +}
> +
> +static inline int
> +insert_before(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	uint32_t start, uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if (end < cur_ent->offset) {
> +		ent->lfsr = alloc_lfsr(ctx);
> +		if (ent->lfsr == NULL) {
> +			rte_free(ent);
> +			return -ENOMEM;
> +		}
> +		/* generate nonoverlapping range [start, end) */
> +		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -ENOSPC;
> +	}
> +	attach_lfsr(ent, cur_ent->lfsr);
> +
> +	/**
> +	 * generate partially overlapping range
> +	 * [start, cur_ent->start) in reverse order
> +	 */
> +	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_BEFORE(cur_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +	return 0;
> +}
> +
> +static inline int
> +insert_after(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	struct rte_thash_subtuple_helper *prev_ent,
> +	uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -EEXIST;
> +	}
> +
> +	attach_lfsr(ent, cur_ent->lfsr);
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_AFTER(prev_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
> +	return 0;
>  }
> 
>  int
> -rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused, uint32_t len __rte_unused,
> -	uint32_t offset __rte_unused)
> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
> +	uint32_t offset)
>  {
> +	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
> +	uint32_t start, end;
> +	int ret;
> +
> +	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
> +			((offset + len + TOEPLITZ_HASH_LEN - 1) >
> +			ctx->key_len * CHAR_BIT))
> +		return -EINVAL;
> +
> +	/* Check for existing name*/
> +	LIST_FOREACH(cur_ent, &ctx->head, next) {
> +		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
> +			return -EEXIST;
> +	}
> +
> +	end = offset + len + TOEPLITZ_HASH_LEN - 1;
> +	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
> +		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
> +		offset;
> +
> +	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> +		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);

Helper can be used by data-path code (via rte_thash_get_compliment()) right?
Then might be better to align it at cache-line. 

> +	if (ent == NULL)
> +		return -ENOMEM;
> +
> +	rte_strlcpy(ent->name, name, sizeof(ent->name));
> +	ent->offset = start;
> +	ent->len = end - start;
> +	ent->tuple_offset = offset;
> +	ent->tuple_len = len;
> +	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
> +
> +	cur_ent = LIST_FIRST(&ctx->head);
> +	while (cur_ent) {
> +		uint32_t range_end = cur_ent->offset + cur_ent->len;
> +		next_ent = LIST_NEXT(cur_ent, next);
> +		prev_ent = cur_ent;
> +		/* Iterate through overlapping ranges */
> +		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
> +			range_end = RTE_MAX(next_ent->offset + next_ent->len,
> +				range_end);
> +			if (start > next_ent->offset)
> +				prev_ent = next_ent;
> +
> +			next_ent = LIST_NEXT(next_ent, next);
> +		}
> +
> +		if (start < cur_ent->offset)
> +			return insert_before(ctx, ent, cur_ent, next_ent,
> +				start, end, range_end);
> +		else if (start < range_end)
> +			return insert_after(ctx, ent, cur_ent, next_ent,
> +				prev_ent, end, range_end);
> +
> +		cur_ent = next_ent;
> +		continue;
> +	}
> +
> +	ent->lfsr = alloc_lfsr(ctx);
> +	if (ent->lfsr == NULL) {
> +		rte_free(ent);
> +		return -ENOMEM;
> +	}
> +
> +	/* generate nonoverlapping range [start, end) */
> +	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +	if (LIST_EMPTY(&ctx->head)) {
> +		LIST_INSERT_HEAD(&ctx->head, ent, next);
> +	} else {
> +		LIST_FOREACH(next_ent, &ctx->head, next)
> +			prev_ent = next_ent;
> +
> +		LIST_INSERT_AFTER(prev_ent, ent, next);
> +	}
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
>  	return 0;
>  }
> 
>  struct rte_thash_subtuple_helper *
> -rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused)
> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
>  {
> +	struct rte_thash_subtuple_helper *ent;
> +
> +	if ((ctx == NULL) || (name == NULL))
> +		return NULL;
> +
> +	LIST_FOREACH(ent, &ctx->head, next) {
> +		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
> +			return ent;
> +	}
> +
>  	return NULL;
>  }
> 
>  uint32_t
> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
> -	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> +	uint32_t hash, uint32_t desired_hash)
>  {
> -	return 0;
> +	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>  }

Would it make sense to add another-one for multi values:
rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
So user can get adjustment values for multiple queues at once? 

> 
>  const uint8_t *
> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>  {
> -	return NULL;
> +	return ctx->hash_key;
> +}
> +
> +static inline void
> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> +{
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp ^= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata)
> +{
> +	unsigned i;
> +
> +	if ((h == NULL) || (orig_tuple == NULL))
> +		return -EINVAL;
> +
> +	adj_bits &= h->lsb_msk;
> +	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
> +	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
> +		uint8_t bit = (adj_bits >> i) & 0x1;
> +		if (bit)
> +			xor_bit(orig_tuple, bit,
> +				h->tuple_offset + h->tuple_len - 1 - i);
> +	}
> +
> +	if (fn != NULL)
> +		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
> +
> +	return 0;
>  }

Not sure is there much point to have a callback that is called only once.
Might be better to rework the function in a way that user to provide 2 callbacks -
one to generate new value, second to check.
Something like that:

int
rte_thash_gen_tuple(struct rte_thash_subtuple_helper *h,
	uint8_t *tuple, uint32_t desired_hash,
	int (*cb_gen_tuple)(uint8_t *, void *),
	int (*cb_check_tuple)(const uint8_t *, void *),
	void *userdata) 
{
	do {
		rc = cb_gen_tuple(tuple, userdata);
		if (rc != 0)
			return rc;
		hash = rte_softrss(tuple, ...);
		adj = rte_thash_get_compliment(h, hash, desired_hash);
		update_tuple(tuple, adj, ...);
		rc = cb_check_tuple(tuple, userdata); 
	} while(rc != 0);

             return rc;
}

> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
> index 38a641b..fd67931 100644
> --- a/lib/librte_hash/rte_thash.h
> +++ b/lib/librte_hash/rte_thash.h
> @@ -360,6 +360,48 @@ __rte_experimental
>  const uint8_t *
>  rte_thash_get_key(struct rte_thash_ctx *ctx);
> 
> +/**
> + * Function prototype for the rte_thash_adjust_tuple
> + * to check if adjusted tuple could be used.
> + * Generally it is some kind of lookup function to check
> + * if adjusted tuple is already in use.
> + *
> + * @param userdata
> + *  Pointer to the userdata. It could be a pointer to the
> + *  table with used tuples to search.
> + * @param tuple
> + *  Pointer to the tuple to check
> + *
> + * @return
> + *  1 on success
> + *  0 otherwise
> + */
> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
> +
> +/**
> + * Adjust tuple with complimentary bits.
> + *
> + * @param h
> + *  Pointer to the helper struct
> + * @param orig_tuple
> + *  Pointer to the tuple to be adjusted
> + * @param adj_bits
> + *  Valure returned by rte_thash_get_compliment
> + * @param fn
> + *  Callback function to check adjusted tuple. Could be NULL
> + * @param userdata
> + *  Pointer to the userdata to be passed to fn(). Could be NULL
> + *
> + * @return
> + *  0 on success
> + *  negative otherwise
> + */
> +__rte_experimental
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
> index 93cb230..a992a1e 100644
> --- a/lib/librte_hash/version.map
> +++ b/lib/librte_hash/version.map
> @@ -32,6 +32,7 @@ DPDK_21 {
>  EXPERIMENTAL {
>  	global:
> 
> +	rte_thash_adjust_tuple;
>  	rte_hash_free_key_with_position;
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
> --
> 2.7.4

Wang, Yipeng1 April 10, 2021, 12:10 a.m. UTC | #2

> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Tuesday, April 6, 2021 12:51 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
> <sameh.gobriel@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: [PATCH v2 2/3] hash: add predictable RSS implementation
> 
> This patch implements predictable RSS functionality.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/librte_hash/rte_thash.c | 577
> ++++++++++++++++++++++++++++++++++++++++++--
>  lib/librte_hash/rte_thash.h |  42 ++++
>  lib/librte_hash/version.map |   1 +
>  3 files changed, 602 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c index
> 79e8724..cc60ada 100644
> --- a/lib/librte_hash/rte_thash.c
> +++ b/lib/librte_hash/rte_thash.c
> @@ -12,6 +12,45 @@
>  #include <rte_malloc.h>
> 
>  #define THASH_NAME_LEN		64
> +#define TOEPLITZ_HASH_LEN	32
> +
> +#define	RETA_SZ_MIN	2U
> +#define	RETA_SZ_MAX	16U
> +#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN)
> && \
> +					(reta_sz <= RETA_SZ_MAX))
> +
> +TAILQ_HEAD(rte_thash_list, rte_tailq_entry); static struct
> +rte_tailq_elem rte_thash_tailq = {
> +	.name = "RTE_THASH",
> +};
> +EAL_REGISTER_TAILQ(rte_thash_tailq)
> +
> +/**
> + * Table of some irreducible polinomials over GF(2).
> + * For lfsr they are reperesented in BE bit order, and
> + * x^0 is masked out.
> + * For example, poly x^5 + x^2 + 1 will be represented
> + * as (101001b & 11111b) = 01001b = 0x9  */ static const uint32_t
> +irreducible_poly_table[][4] = {
> +	{0, 0, 0, 0},	/** < degree 0 */
> +	{1, 1, 1, 1},	/** < degree 1 */
> +	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
> +	{0x5, 0x3, 0x5, 0x3},
> +	{0x9, 0x3, 0x9, 0x3},
> +	{0x9, 0x1b, 0xf, 0x5},
> +	{0x21, 0x33, 0x1b, 0x2d},
> +	{0x41, 0x11, 0x71, 0x9},
> +	{0x71, 0xa9, 0xf5, 0x8d},
> +	{0x21, 0xd1, 0x69, 0x1d9},
> +	{0x81, 0x2c1, 0x3b1, 0x185},
> +	{0x201, 0x541, 0x341, 0x461},
> +	{0x941, 0x609, 0xe19, 0x45d},
> +	{0x1601, 0x1f51, 0x1171, 0x359},
> +	{0x2141, 0x2111, 0x2db1, 0x2109},
> +	{0x4001, 0x801, 0x101, 0x7301},
> +	{0x7781, 0xa011, 0x4211, 0x86d9},
> +};
> 
>  struct thash_lfsr {
>  	uint32_t	ref_cnt;
> @@ -31,8 +70,10 @@ struct rte_thash_subtuple_helper {
>  	char	name[THASH_NAME_LEN];	/** < Name of subtuple
> configuration */
>  	LIST_ENTRY(rte_thash_subtuple_helper)	next;
>  	struct thash_lfsr	*lfsr;
> -	uint32_t	offset;		/** < Offset in bits of the subtuple */
> -	uint32_t	len;		/** < Length in bits of the subtuple
> */
> +	uint32_t	offset;		/** < Offset of the m-sequence */
> +	uint32_t	len;		/** < Length of the m-sequence */
> +	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
> +	uint32_t	tuple_len;	/** < Length in bits of the subtuple
> */
>  	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
>  	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
>  	/** < Complimentary table */
> @@ -48,49 +89,549 @@ struct rte_thash_ctx {
>  	uint8_t		hash_key[0];
>  };
> 
> +static inline uint32_t
> +get_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	/*
> +	 * masking the TAP bits defined by the polynomial and
> +	 * calculating parity
> +	 */
> +	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
> +	ret = lfsr->state & 0x1;
> +	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +get_rev_bit_lfsr(struct thash_lfsr *lfsr) {
> +	uint32_t bit, ret;
> +
> +	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
> +	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
> +	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +thash_get_rand_poly(uint32_t poly_degree) {
> +	return irreducible_poly_table[poly_degree][rte_rand() %
> +		RTE_DIM(irreducible_poly_table[poly_degree])];
> +}
> +
> +static struct thash_lfsr *
> +alloc_lfsr(struct rte_thash_ctx *ctx)
> +{
> +	struct thash_lfsr *lfsr;
> +	uint32_t i;
> +
> +	if (ctx == NULL)
> +		return NULL;
> +
> +	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
> +	if (lfsr == NULL)
> +		return NULL;
> +
> +	lfsr->deg = ctx->reta_sz_log;
> +	lfsr->poly = thash_get_rand_poly(lfsr->deg);
> +	do {
> +		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
> +	} while (lfsr->state == 0);
> +	/* init reverse order polynomial */
> +	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
> +	/* init proper rev_state*/
> +	lfsr->rev_state = lfsr->state;
> +	for (i = 0; i <= lfsr->deg; i++)
> +		get_rev_bit_lfsr(lfsr);
> +
> +	/* clear bits_cnt after rev_state was inited */
> +	lfsr->bits_cnt = 0;
> +	lfsr->ref_cnt = 1;
> +
> +	return lfsr;
> +}
> +
> +static void
> +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr
> +*lfsr) {
> +	lfsr->ref_cnt++;
> +	h->lfsr = lfsr;
> +}
> +
> +static void
> +free_lfsr(struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt--;
> +	if (lfsr->ref_cnt == 0)
> +		rte_free(lfsr);
> +}
> +
>  struct rte_thash_ctx *
> -rte_thash_init_ctx(const char *name __rte_unused,
> -	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
> -	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> +	uint8_t *key, uint32_t flags)
>  {
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	uint32_t i;
> +	if ((name == NULL) || (key_len == 0)
> || !RETA_SZ_IN_RANGE(reta_sz)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* guarantee there's no existing */
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +	ctx = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	/* allocate tailq entry */
> +	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH,
> +			"Can not allocate tailq entry for thash context %s\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto exit;
> +	}
> +
> +	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
> +	if (ctx == NULL) {
> +		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation
> failed\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto free_te;
> +	}
> +
> +	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
> +	ctx->key_len = key_len;
> +	ctx->reta_sz_log = reta_sz;
> +	LIST_INIT(&ctx->head);
> +	ctx->flags = flags;
> +
> +	if (key)
> +		rte_memcpy(ctx->hash_key, key, key_len);
> +	else {
> +		for (i = 0; i < key_len; i++)
> +			ctx->hash_key[i] = rte_rand();
> +	}
> +
> +	te->data = (void *)ctx;
> +	TAILQ_INSERT_TAIL(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ctx;
> +free_te:
> +	rte_free(te);
> +exit:
> +	rte_mcfg_tailq_write_unlock();
>  	return NULL;
>  }
> 
>  struct rte_thash_ctx *
> -rte_thash_find_existing(const char *name __rte_unused)
> +rte_thash_find_existing(const char *name)
>  {
> -	return NULL;
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return ctx;
>  }
> 
>  void
> -rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx)
>  {
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	struct rte_thash_subtuple_helper *ent, *tmp;
> +
> +	if (ctx == NULL)
> +		return;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		if (te->data == (void *)ctx)
> +			break;
> +	}
> +
> +	if (te != NULL)
> +		TAILQ_REMOVE(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +	ent = LIST_FIRST(&(ctx->head));
> +	while (ent) {
> +		free_lfsr(ent->lfsr);
> +		tmp = ent;
> +		ent = LIST_NEXT(ent, next);
> +		LIST_REMOVE(tmp, next);
> +		rte_free(tmp);
> +	}
> +
> +	rte_free(ctx);
> +	rte_free(te);
> +}
> +
> +static inline void
> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) {
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp &= ~(1 << bit_idx);
> +	tmp |= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +/**
> + * writes m-sequence to the hash_key for range [start, end]
> + * (i.e. including start and end positions)  */ static int
> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
> +	uint32_t start, uint32_t end)
> +{
> +	uint32_t i;
> +	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
> +	req_bits++; /* due to incuding end */
> +
> +	/* check if lfsr overflow period of the m-sequence */
> +	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
> +			((ctx->flags &
> RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
> +			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
> +		return -ENOSPC;
[Wang, Yipeng] 
If nospace, should one increase lfsr->deg? Or if it is already the highest deg you predefined then what to do?
Maybe a log msg could help user with more information on the solutions.
> +
> +	if (start < end) {
> +		/* original direction (from left to right)*/
> +		for (i = start; i <= end; i++)
> +			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
> +
> +	} else {
> +		/* reverse direction (from right to left) */
> +		for (i = end; i >= start; i--)
> +			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
> +	}
> +
> +	return 0;
> +}
> +
> +static inline uint32_t
> +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) {
> +	uint32_t *tmp, val;
> +
> +	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
> +	val = rte_be_to_cpu_32(*tmp);
> +	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
> +		ctx->reta_sz_log));
> +
> +	return val & ((1 << ctx->reta_sz_log) - 1); }
> +
> +static inline void
> +generate_compliment_table(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *h)
> +{
> +	int i, j, k;
> +	uint32_t val;
> +	uint32_t start;
> +
> +	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
> +
> +	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
> +		val = 0;
> +		for (j = i; j; j &= (j - 1)) {
> +			k = rte_bsf32(j);
> +			val ^= get_subvalue(ctx, start - k +
> +				ctx->reta_sz_log - 1);
> +		}
> +		h->compl_table[val] = i;
> +	}
> +}
> +
> +static inline int
> +insert_before(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	uint32_t start, uint32_t end, uint32_t range_end) {
> +	int ret;
> +
> +	if (end < cur_ent->offset) {
> +		ent->lfsr = alloc_lfsr(ctx);
> +		if (ent->lfsr == NULL) {
> +			rte_free(ent);
> +			return -ENOMEM;
> +		}
> +		/* generate nonoverlapping range [start, end) */
> +		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -ENOSPC;
> +	}
> +	attach_lfsr(ent, cur_ent->lfsr);
> +
> +	/**
> +	 * generate partially overlapping range
> +	 * [start, cur_ent->start) in reverse order
> +	 */
> +	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_BEFORE(cur_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +	return 0;
> +}
> +
> +static inline int
> +insert_after(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	struct rte_thash_subtuple_helper *prev_ent,
> +	uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -EEXIST;
> +	}
> +
> +	attach_lfsr(ent, cur_ent->lfsr);
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_AFTER(prev_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
> +	return 0;
>  }
> 
>  int
> -rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused, uint32_t len __rte_unused,
> -	uint32_t offset __rte_unused)
> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name,
> uint32_t len,
> +	uint32_t offset)
>  {
> +	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent,
> *next_ent;
> +	uint32_t start, end;
> +	int ret;
> +
> +	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
> +			((offset + len + TOEPLITZ_HASH_LEN - 1) >
> +			ctx->key_len * CHAR_BIT))
> +		return -EINVAL;
> +
> +	/* Check for existing name*/
> +	LIST_FOREACH(cur_ent, &ctx->head, next) {
> +		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name))
> == 0)
> +			return -EEXIST;
> +	}
> +
> +	end = offset + len + TOEPLITZ_HASH_LEN - 1;
> +	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
> +		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log -
> 1)) :
> +		offset;
> +
> +	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> +		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> +	if (ent == NULL)
> +		return -ENOMEM;
> +
> +	rte_strlcpy(ent->name, name, sizeof(ent->name));
> +	ent->offset = start;
> +	ent->len = end - start;
> +	ent->tuple_offset = offset;
> +	ent->tuple_len = len;
> +	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
> +
> +	cur_ent = LIST_FIRST(&ctx->head);
> +	while (cur_ent) {
> +		uint32_t range_end = cur_ent->offset + cur_ent->len;
> +		next_ent = LIST_NEXT(cur_ent, next);
> +		prev_ent = cur_ent;
> +		/* Iterate through overlapping ranges */
> +		while ((next_ent != NULL) && (next_ent->offset <
> range_end)) {
> +			range_end = RTE_MAX(next_ent->offset +
> next_ent->len,
> +				range_end);
> +			if (start > next_ent->offset)
> +				prev_ent = next_ent;
> +
> +			next_ent = LIST_NEXT(next_ent, next);
> +		}
> +
> +		if (start < cur_ent->offset)
> +			return insert_before(ctx, ent, cur_ent, next_ent,
> +				start, end, range_end);
> +		else if (start < range_end)
> +			return insert_after(ctx, ent, cur_ent, next_ent,
> +				prev_ent, end, range_end);
> +
> +		cur_ent = next_ent;
> +		continue;
> +	}
> +
> +	ent->lfsr = alloc_lfsr(ctx);
> +	if (ent->lfsr == NULL) {
> +		rte_free(ent);
> +		return -ENOMEM;
> +	}
> +
> +	/* generate nonoverlapping range [start, end) */
> +	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +	if (LIST_EMPTY(&ctx->head)) {
> +		LIST_INSERT_HEAD(&ctx->head, ent, next);
> +	} else {
> +		LIST_FOREACH(next_ent, &ctx->head, next)
> +			prev_ent = next_ent;
> +
> +		LIST_INSERT_AFTER(prev_ent, ent, next);
> +	}
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
>  	return 0;
>  }
> 
>  struct rte_thash_subtuple_helper *
> -rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused)
> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
>  {
> +	struct rte_thash_subtuple_helper *ent;
> +
> +	if ((ctx == NULL) || (name == NULL))
> +		return NULL;
> +
> +	LIST_FOREACH(ent, &ctx->head, next) {
> +		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
> +			return ent;
> +	}
> +
>  	return NULL;
>  }
> 
>  uint32_t
> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h
> __rte_unused,
> -	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> +	uint32_t hash, uint32_t desired_hash)
>  {
> -	return 0;
> +	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>  }
> 
>  const uint8_t *
> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>  {
> -	return NULL;
> +	return ctx->hash_key;
> +}
> +
> +static inline void
> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) {
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp ^= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata) {
> +	unsigned i;
> +
> +	if ((h == NULL) || (orig_tuple == NULL))
> +		return -EINVAL;
> +
> +	adj_bits &= h->lsb_msk;
> +	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
> +	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
> +		uint8_t bit = (adj_bits >> i) & 0x1;
> +		if (bit)
> +			xor_bit(orig_tuple, bit,
> +				h->tuple_offset + h->tuple_len - 1 - i);
> +	}
> +
> +	if (fn != NULL)
> +		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
> +
> +	return 0;
>  }
> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index
> 38a641b..fd67931 100644
> --- a/lib/librte_hash/rte_thash.h
> +++ b/lib/librte_hash/rte_thash.h
> @@ -360,6 +360,48 @@ __rte_experimental
>  const uint8_t *
>  rte_thash_get_key(struct rte_thash_ctx *ctx);
> 
> +/**
> + * Function prototype for the rte_thash_adjust_tuple
> + * to check if adjusted tuple could be used.
> + * Generally it is some kind of lookup function to check
> + * if adjusted tuple is already in use.
> + *
> + * @param userdata
> + *  Pointer to the userdata. It could be a pointer to the
> + *  table with used tuples to search.
> + * @param tuple
> + *  Pointer to the tuple to check
> + *
> + * @return
> + *  1 on success
> + *  0 otherwise
> + */
> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
> +
> +/**
> + * Adjust tuple with complimentary bits.
> + *
[Wang, Yipeng] 
More explanation for this API is needed.
My understanding is that user should call this function in a loop, until
the above callback function returns success thus this function succeeds.
BTW, why not put this API in the first API commit?

> + * @param h
> + *  Pointer to the helper struct
> + * @param orig_tuple
> + *  Pointer to the tuple to be adjusted
> + * @param adj_bits
> + *  Valure returned by rte_thash_get_compliment
[Wang, Yipeng] typo. *value
> + * @param fn
> + *  Callback function to check adjusted tuple. Could be NULL
> + * @param userdata
> + *  Pointer to the userdata to be passed to fn(). Could be NULL
> + *
> + * @return
> + *  0 on success
> + *  negative otherwise
> + */
> +__rte_experimental
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
> 93cb230..a992a1e 100644
> --- a/lib/librte_hash/version.map
> +++ b/lib/librte_hash/version.map
> @@ -32,6 +32,7 @@ DPDK_21 {
>  EXPERIMENTAL {
>  	global:
> 
> +	rte_thash_adjust_tuple;
>  	rte_hash_free_key_with_position;
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
> --
> 2.7.4

Vladimir Medvedkin April 11, 2021, 6:51 p.m. UTC | #3

Hi Konstantin,

Thanks for the review,

On 07/04/2021 15:53, Ananyev, Konstantin wrote:
> Hi Vladimir,
> 
> Few comments below, mostly minor.
> One generic one - doc seems missing.
> With that in place:
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
>>
>> This patch implements predictable RSS functionality.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

<snip>

>> +#defineRETA_SZ_MIN2U
>> +#defineRETA_SZ_MAX16U
> 
> Should these RETA_SZ defines be in public header?
> So user can know what are allowed values?
> 

I don't think this is necessary, because the user chooses it not 
arbitrary, but depending on the NIC.

>> +#define RETA_SZ_IN_RANGE(reta_sz)((reta_sz >= RETA_SZ_MIN) && \

<snip>

>> +uint32_t i;
> 
> Empty line is  missing.
> 

Thanks

>> +if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
>> +rte_errno = EINVAL;
>> +return NULL;
>> +}

<snip>

>> +static inline void
>> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
>> +{
>> +uint32_t byte_idx = pos >> 3;
> 
> Just as a nit to be consistent with the line below:
> pos / CHAR_BIT;
> 

Fixed

>> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
>> +uint8_t tmp;

<snip>

>> +ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
>> +sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> 
> Helper can be used by data-path code (via rte_thash_get_compliment()) right?
> Then might be better to align it at cache-line.
> 

Agree, I'll fix it

>> +if (ent == NULL)
>> +return -ENOMEM;

<snip>

>>   uint32_t
>> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
>> -uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
>> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
>> +uint32_t hash, uint32_t desired_hash)
>>   {
>> -return 0;
>> +return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>>   }
> 
> Would it make sense to add another-one for multi values:
> rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
> So user can get adjustment values for multiple queues at once?
> 

At the moment I can't find scenarios why do we need to have a bulk 
version for this function

>>
>>   const uint8_t *
>> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
>> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>>   {
>> -return NULL;
>> +return ctx->hash_key;
>> +}
>> +
>> +static inline void
>> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
>> +{
>> +uint32_t byte_idx = pos >> 3;
>> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
>> +uint8_t tmp;
>> +
>> +tmp = ptr[byte_idx];
>> +tmp ^= bit << bit_idx;
>> +ptr[byte_idx] = tmp;
>> +}
>> +
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata)
>> +{
>> +unsigned i;
>> +
>> +if ((h == NULL) || (orig_tuple == NULL))
>> +return -EINVAL;
>> +
>> +adj_bits &= h->lsb_msk;
>> +/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
>> +for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
>> +uint8_t bit = (adj_bits >> i) & 0x1;
>> +if (bit)
>> +xor_bit(orig_tuple, bit,
>> +h->tuple_offset + h->tuple_len - 1 - i);
>> +}
>> +
>> +if (fn != NULL)
>> +return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
>> +
>> +return 0;
>>   }
> 
> Not sure is there much point to have a callback that is called only once.
> Might be better to rework the function in a way that user to provide 2 callbacks -
> one to generate new value, second to check.
> Something like that:
> 
> int
> rte_thash_gen_tuple(struct rte_thash_subtuple_helper *h,
> uint8_t *tuple, uint32_t desired_hash,
> int (*cb_gen_tuple)(uint8_t *, void *),
> int (*cb_check_tuple)(const uint8_t *, void *),
> void *userdata)
> {
> do {
> rc = cb_gen_tuple(tuple, userdata);
> if (rc != 0)
> return rc;
> hash = rte_softrss(tuple, ...);
> adj = rte_thash_get_compliment(h, hash, desired_hash);
> update_tuple(tuple, adj, ...);
> rc = cb_check_tuple(tuple, userdata);
> } while(rc != 0);
> 
>               return rc;
> }

Agree, there is no point to call the callback for a single function 
call. I'll rewrite rte_thash_adjust_tuple() and send a new version an 
v3. As for gen_tuple, I think we don't need to have a separate callback,
new rte_thash_adjust_tuple implementation randomly changes corresponding 
bits (based on configured offset and length in the helper) in the tuple.

> 
>> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
>> index 38a641b..fd67931 100644
>> --- a/lib/librte_hash/rte_thash.h
>> +++ b/lib/librte_hash/rte_thash.h
>> @@ -360,6 +360,48 @@ __rte_experimental
>>   const uint8_t *
>>   rte_thash_get_key(struct rte_thash_ctx *ctx);
>>
>> +/**
>> + * Function prototype for the rte_thash_adjust_tuple
>> + * to check if adjusted tuple could be used.
>> + * Generally it is some kind of lookup function to check
>> + * if adjusted tuple is already in use.
>> + *
>> + * @param userdata
>> + *  Pointer to the userdata. It could be a pointer to the
>> + *  table with used tuples to search.
>> + * @param tuple
>> + *  Pointer to the tuple to check
>> + *
>> + * @return
>> + *  1 on success
>> + *  0 otherwise
>> + */
>> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
>> +
>> +/**
>> + * Adjust tuple with complimentary bits.
>> + *
>> + * @param h
>> + *  Pointer to the helper struct
>> + * @param orig_tuple
>> + *  Pointer to the tuple to be adjusted
>> + * @param adj_bits
>> + *  Valure returned by rte_thash_get_compliment
>> + * @param fn
>> + *  Callback function to check adjusted tuple. Could be NULL
>> + * @param userdata
>> + *  Pointer to the userdata to be passed to fn(). Could be NULL
>> + *
>> + * @return
>> + *  0 on success
>> + *  negative otherwise
>> + */
>> +__rte_experimental
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
>> index 93cb230..a992a1e 100644
>> --- a/lib/librte_hash/version.map
>> +++ b/lib/librte_hash/version.map
>> @@ -32,6 +32,7 @@ DPDK_21 {
>>   EXPERIMENTAL {
>>   global:
>>
>> +rte_thash_adjust_tuple;
>>   rte_hash_free_key_with_position;
>>   rte_hash_lookup_with_hash_bulk;
>>   rte_hash_lookup_with_hash_bulk_data;
>> --
>> 2.7.4
>

Vladimir Medvedkin April 11, 2021, 6:52 p.m. UTC | #4

On 10/04/2021 03:10, Wang, Yipeng1 wrote:
>> -----Original Message-----
>> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
>> Sent: Tuesday, April 6, 2021 12:51 PM
>> To: dev@dpdk.org
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
>> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
>> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
>> <sameh.gobriel@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>
>> Subject: [PATCH v2 2/3] hash: add predictable RSS implementation
>>
>> This patch implements predictable RSS functionality.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>   lib/librte_hash/rte_thash.c | 577
>> ++++++++++++++++++++++++++++++++++++++++++--
>>   lib/librte_hash/rte_thash.h |  42 ++++
>>   lib/librte_hash/version.map |   1 +
>>   3 files changed, 602 insertions(+), 18 deletions(-)
>>

<snip>

>> +/**
>> + * writes m-sequence to the hash_key for range [start, end]
>> + * (i.e. including start and end positions)  */ static int
>> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
>> +uint32_t start, uint32_t end)
>> +{
>> +uint32_t i;
>> +uint32_t req_bits = (start < end) ? (end - start) : (start - end);
>> +req_bits++; /* due to incuding end */
>> +
>> +/* check if lfsr overflow period of the m-sequence */
>> +if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
>> +((ctx->flags &
>> RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
>> +RTE_THASH_IGNORE_PERIOD_OVERFLOW))
>> +return -ENOSPC;
> [Wang, Yipeng]
> If nospace, should one increase lfsr->deg? Or if it is already the highest deg you predefined then what to do?
> Maybe a log msg could help user with more information on the solutions.

It is not possible to increase the degree of lfsr due to mathematical 
restrictions. It must be exactly equal to the number of bits for which 
we want to have a collision.

>> +
>> +if (start < end) {
>> +/* original direction (from left to right)*/
>> +for (i = start; i <= end; i++)
>> +set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);

<snip>

>> +/**
>> + * Adjust tuple with complimentary bits.
>> + *
> [Wang, Yipeng]
> More explanation for this API is needed.
> My understanding is that user should call this function in a loop, until
> the above callback function returns success thus this function succeeds.

I'm going to rewrite this function in v3. The loop will be internal.

> BTW, why not put this API in the first API commit?
> 

My fault, will fix this

>> + * @param h
>> + *  Pointer to the helper struct
>> + * @param orig_tuple
>> + *  Pointer to the tuple to be adjusted
>> + * @param adj_bits
>> + *  Valure returned by rte_thash_get_compliment
> [Wang, Yipeng] typo. *value
>> + * @param fn
>> + *  Callback function to check adjusted tuple. Could be NULL
>> + * @param userdata
>> + *  Pointer to the userdata to be passed to fn(). Could be NULL
>> + *
>> + * @return
>> + *  0 on success
>> + *  negative otherwise
>> + */
>> +__rte_experimental
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
>> 93cb230..a992a1e 100644
>> --- a/lib/librte_hash/version.map
>> +++ b/lib/librte_hash/version.map
>> @@ -32,6 +32,7 @@ DPDK_21 {
>>   EXPERIMENTAL {
>>   global:
>>
>> +rte_thash_adjust_tuple;
>>   rte_hash_free_key_with_position;
>>   rte_hash_lookup_with_hash_bulk;
>>   rte_hash_lookup_with_hash_bulk_data;
>> --
>> 2.7.4
>

Ananyev, Konstantin April 12, 2021, 9:47 a.m. UTC | #5

> 
> <snip>
> 
> >> +#defineRETA_SZ_MIN2U
> >> +#defineRETA_SZ_MAX16U
> >
> > Should these RETA_SZ defines be in public header?
> > So user can know what are allowed values?
> >
> 
> I don't think this is necessary, because the user chooses it not
> arbitrary, but depending on the NIC.

Sure thing, but it would be goo for the user to know what are the SW
Limitations on it without digging through .c.

> 
> >> +#define RETA_SZ_IN_RANGE(reta_sz)((reta_sz >= RETA_SZ_MIN) && \
> 
> <snip>
> 
> >> +uint32_t i;
> >
> > Empty line is  missing.
> >
> 
> Thanks
> 
> >> +if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
> >> +rte_errno = EINVAL;
> >> +return NULL;
> >> +}
> 
> <snip>
> 
> >> +static inline void
> >> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> >> +{
> >> +uint32_t byte_idx = pos >> 3;
> >
> > Just as a nit to be consistent with the line below:
> > pos / CHAR_BIT;
> >
> 
> Fixed
> 
> >> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> >> +uint8_t tmp;
> 
> <snip>
> 
> >> +ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> >> +sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> >
> > Helper can be used by data-path code (via rte_thash_get_compliment()) right?
> > Then might be better to align it at cache-line.
> >
> 
> Agree, I'll fix it
> 
> >> +if (ent == NULL)
> >> +return -ENOMEM;
> 
> <snip>
> 
> >>   uint32_t
> >> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
> >> -uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> >> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> >> +uint32_t hash, uint32_t desired_hash)
> >>   {
> >> -return 0;
> >> +return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
> >>   }
> >
> > Would it make sense to add another-one for multi values:
> > rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
> > So user can get adjustment values for multiple queues at once?
> >
> 
> At the moment I can't find scenarios why do we need to have a bulk
> version for this function

My thought was about case when number of configured
HW queues is less than reta_size.
Let say reta_size==4, but user configured only 3 queues and reta={0,1,2,0}.
In that case for queue 0, both 0 and 3 values would suit.

Vladimir Medvedkin April 13, 2021, 12:28 p.m. UTC | #6

Hi Konstantin,

On 12/04/2021 12:47, Ananyev, Konstantin wrote:
>>
>> <snip>
>>
>>>> +#defineRETA_SZ_MIN2U
>>>> +#defineRETA_SZ_MAX16U
>>>
>>> Should these RETA_SZ defines be in public header?
>>> So user can know what are allowed values?
>>>
>>
>> I don't think this is necessary, because the user chooses it not
>> arbitrary, but depending on the NIC.
> 
> Sure thing, but it would be goo for the user to know what are the SW
> Limitations on it without digging through .c.
> 

I see, I'll move it to .h

>>
>>>> +#define RETA_SZ_IN_RANGE(reta_sz)((reta_sz >= RETA_SZ_MIN) && \
>>
>> <snip>
>>
>>>> +uint32_t i;
>>>
>>> Empty line is  missing.
>>>
>>
>> Thanks
>>
>>>> +if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
>>>> +rte_errno = EINVAL;
>>>> +return NULL;
>>>> +}
>>
>> <snip>
>>
>>>> +static inline void
>>>> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
>>>> +{
>>>> +uint32_t byte_idx = pos >> 3;
>>>
>>> Just as a nit to be consistent with the line below:
>>> pos / CHAR_BIT;
>>>
>>
>> Fixed
>>
>>>> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
>>>> +uint8_t tmp;
>>
>> <snip>
>>
>>>> +ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
>>>> +sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
>>>
>>> Helper can be used by data-path code (via rte_thash_get_compliment()) right?
>>> Then might be better to align it at cache-line.
>>>
>>
>> Agree, I'll fix it
>>
>>>> +if (ent == NULL)
>>>> +return -ENOMEM;
>>
>> <snip>
>>
>>>>    uint32_t
>>>> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
>>>> -uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
>>>> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
>>>> +uint32_t hash, uint32_t desired_hash)
>>>>    {
>>>> -return 0;
>>>> +return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>>>>    }
>>>
>>> Would it make sense to add another-one for multi values:
>>> rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
>>> So user can get adjustment values for multiple queues at once?
>>>
>>
>> At the moment I can't find scenarios why do we need to have a bulk
>> version for this function
> 
> My thought was about case when number of configured
> HW queues is less than reta_size.
> Let say reta_size==4, but user configured only 3 queues and reta={0,1,2,0}.
> In that case for queue 0, both 0 and 3 values would suit.
> 

Ah, yes, I remember out discussion about this. I'll think about this and 
add next releases.
I should be kind of

int
rte_thash_get_compliment_reta(struct rte_thash_subtuple_helper *helper, 
uint32_t hash, const uint16_t queue_id, struct rte_eth_rss_reta_entry64 
*reta, int reta_sz);

where reta is completed by rte_eth_dev_rss_reta_query()

diff mbox series

Patch

diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
index 79e8724..cc60ada 100644
--- a/lib/librte_hash/rte_thash.c
+++ b/lib/librte_hash/rte_thash.c
@@ -12,6 +12,45 @@ 
 #include <rte_malloc.h>
 
 #define THASH_NAME_LEN		64
+#define TOEPLITZ_HASH_LEN	32
+
+#define	RETA_SZ_MIN	2U
+#define	RETA_SZ_MAX	16U
+#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
+					(reta_sz <= RETA_SZ_MAX))
+
+TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_thash_tailq = {
+	.name = "RTE_THASH",
+};
+EAL_REGISTER_TAILQ(rte_thash_tailq)
+
+/**
+ * Table of some irreducible polinomials over GF(2).
+ * For lfsr they are reperesented in BE bit order, and
+ * x^0 is masked out.
+ * For example, poly x^5 + x^2 + 1 will be represented
+ * as (101001b & 11111b) = 01001b = 0x9
+ */
+static const uint32_t irreducible_poly_table[][4] = {
+	{0, 0, 0, 0},	/** < degree 0 */
+	{1, 1, 1, 1},	/** < degree 1 */
+	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
+	{0x5, 0x3, 0x5, 0x3},
+	{0x9, 0x3, 0x9, 0x3},
+	{0x9, 0x1b, 0xf, 0x5},
+	{0x21, 0x33, 0x1b, 0x2d},
+	{0x41, 0x11, 0x71, 0x9},
+	{0x71, 0xa9, 0xf5, 0x8d},
+	{0x21, 0xd1, 0x69, 0x1d9},
+	{0x81, 0x2c1, 0x3b1, 0x185},
+	{0x201, 0x541, 0x341, 0x461},
+	{0x941, 0x609, 0xe19, 0x45d},
+	{0x1601, 0x1f51, 0x1171, 0x359},
+	{0x2141, 0x2111, 0x2db1, 0x2109},
+	{0x4001, 0x801, 0x101, 0x7301},
+	{0x7781, 0xa011, 0x4211, 0x86d9},
+};
 
 struct thash_lfsr {
 	uint32_t	ref_cnt;
@@ -31,8 +70,10 @@  struct rte_thash_subtuple_helper {
 	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
 	LIST_ENTRY(rte_thash_subtuple_helper)	next;
 	struct thash_lfsr	*lfsr;
-	uint32_t	offset;		/** < Offset in bits of the subtuple */
-	uint32_t	len;		/** < Length in bits of the subtuple */
+	uint32_t	offset;		/** < Offset of the m-sequence */
+	uint32_t	len;		/** < Length of the m-sequence */
+	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
+	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
 	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
 	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
 	/** < Complimentary table */
@@ -48,49 +89,549 @@  struct rte_thash_ctx {
 	uint8_t		hash_key[0];
 };
 
+static inline uint32_t
+get_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	/*
+	 * masking the TAP bits defined by the polynomial and
+	 * calculating parity
+	 */
+	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
+	ret = lfsr->state & 0x1;
+	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+get_rev_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
+	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
+	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+thash_get_rand_poly(uint32_t poly_degree)
+{
+	return irreducible_poly_table[poly_degree][rte_rand() %
+		RTE_DIM(irreducible_poly_table[poly_degree])];
+}
+
+static struct thash_lfsr *
+alloc_lfsr(struct rte_thash_ctx *ctx)
+{
+	struct thash_lfsr *lfsr;
+	uint32_t i;
+
+	if (ctx == NULL)
+		return NULL;
+
+	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
+	if (lfsr == NULL)
+		return NULL;
+
+	lfsr->deg = ctx->reta_sz_log;
+	lfsr->poly = thash_get_rand_poly(lfsr->deg);
+	do {
+		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
+	} while (lfsr->state == 0);
+	/* init reverse order polynomial */
+	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
+	/* init proper rev_state*/
+	lfsr->rev_state = lfsr->state;
+	for (i = 0; i <= lfsr->deg; i++)
+		get_rev_bit_lfsr(lfsr);
+
+	/* clear bits_cnt after rev_state was inited */
+	lfsr->bits_cnt = 0;
+	lfsr->ref_cnt = 1;
+
+	return lfsr;
+}
+
+static void
+attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt++;
+	h->lfsr = lfsr;
+}
+
+static void
+free_lfsr(struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt--;
+	if (lfsr->ref_cnt == 0)
+		rte_free(lfsr);
+}
+
 struct rte_thash_ctx *
-rte_thash_init_ctx(const char *name __rte_unused,
-	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
-	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags)
 {
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	uint32_t i;
+	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+	ctx = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH,
+			"Can not allocate tailq entry for thash context %s\n",
+			name);
+		rte_errno = ENOMEM;
+		goto exit;
+	}
+
+	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
+	if (ctx == NULL) {
+		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
+			name);
+		rte_errno = ENOMEM;
+		goto free_te;
+	}
+
+	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
+	ctx->key_len = key_len;
+	ctx->reta_sz_log = reta_sz;
+	LIST_INIT(&ctx->head);
+	ctx->flags = flags;
+
+	if (key)
+		rte_memcpy(ctx->hash_key, key, key_len);
+	else {
+		for (i = 0; i < key_len; i++)
+			ctx->hash_key[i] = rte_rand();
+	}
+
+	te->data = (void *)ctx;
+	TAILQ_INSERT_TAIL(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	return ctx;
+free_te:
+	rte_free(te);
+exit:
+	rte_mcfg_tailq_write_unlock();
 	return NULL;
 }
 
 struct rte_thash_ctx *
-rte_thash_find_existing(const char *name __rte_unused)
+rte_thash_find_existing(const char *name)
 {
-	return NULL;
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+
+	rte_mcfg_tailq_read_unlock();
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return ctx;
 }
 
 void
-rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_free_ctx(struct rte_thash_ctx *ctx)
 {
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	struct rte_thash_subtuple_helper *ent, *tmp;
+
+	if (ctx == NULL)
+		return;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		if (te->data == (void *)ctx)
+			break;
+	}
+
+	if (te != NULL)
+		TAILQ_REMOVE(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+	ent = LIST_FIRST(&(ctx->head));
+	while (ent) {
+		free_lfsr(ent->lfsr);
+		tmp = ent;
+		ent = LIST_NEXT(ent, next);
+		LIST_REMOVE(tmp, next);
+		rte_free(tmp);
+	}
+
+	rte_free(ctx);
+	rte_free(te);
+}
+
+static inline void
+set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp &= ~(1 << bit_idx);
+	tmp |= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+/**
+ * writes m-sequence to the hash_key for range [start, end]
+ * (i.e. including start and end positions)
+ */
+static int
+generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
+	uint32_t start, uint32_t end)
+{
+	uint32_t i;
+	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
+	req_bits++; /* due to incuding end */
+
+	/* check if lfsr overflow period of the m-sequence */
+	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
+			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
+			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
+		return -ENOSPC;
+
+	if (start < end) {
+		/* original direction (from left to right)*/
+		for (i = start; i <= end; i++)
+			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
+
+	} else {
+		/* reverse direction (from right to left) */
+		for (i = end; i >= start; i--)
+			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
+	}
+
+	return 0;
+}
+
+static inline uint32_t
+get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
+{
+	uint32_t *tmp, val;
+
+	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
+	val = rte_be_to_cpu_32(*tmp);
+	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
+		ctx->reta_sz_log));
+
+	return val & ((1 << ctx->reta_sz_log) - 1);
+}
+
+static inline void
+generate_compliment_table(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h)
+{
+	int i, j, k;
+	uint32_t val;
+	uint32_t start;
+
+	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
+
+	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
+		val = 0;
+		for (j = i; j; j &= (j - 1)) {
+			k = rte_bsf32(j);
+			val ^= get_subvalue(ctx, start - k +
+				ctx->reta_sz_log - 1);
+		}
+		h->compl_table[val] = i;
+	}
+}
+
+static inline int
+insert_before(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	uint32_t start, uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if (end < cur_ent->offset) {
+		ent->lfsr = alloc_lfsr(ctx);
+		if (ent->lfsr == NULL) {
+			rte_free(ent);
+			return -ENOMEM;
+		}
+		/* generate nonoverlapping range [start, end) */
+		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -ENOSPC;
+	}
+	attach_lfsr(ent, cur_ent->lfsr);
+
+	/**
+	 * generate partially overlapping range
+	 * [start, cur_ent->start) in reverse order
+	 */
+	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_BEFORE(cur_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+	return 0;
+}
+
+static inline int
+insert_after(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	struct rte_thash_subtuple_helper *prev_ent,
+	uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -EEXIST;
+	}
+
+	attach_lfsr(ent, cur_ent->lfsr);
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_AFTER(prev_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
+	return 0;
 }
 
 int
-rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused, uint32_t len __rte_unused,
-	uint32_t offset __rte_unused)
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset)
 {
+	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
+	uint32_t start, end;
+	int ret;
+
+	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
+			((offset + len + TOEPLITZ_HASH_LEN - 1) >
+			ctx->key_len * CHAR_BIT))
+		return -EINVAL;
+
+	/* Check for existing name*/
+	LIST_FOREACH(cur_ent, &ctx->head, next) {
+		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
+			return -EEXIST;
+	}
+
+	end = offset + len + TOEPLITZ_HASH_LEN - 1;
+	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
+		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
+		offset;
+
+	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
+		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
+	if (ent == NULL)
+		return -ENOMEM;
+
+	rte_strlcpy(ent->name, name, sizeof(ent->name));
+	ent->offset = start;
+	ent->len = end - start;
+	ent->tuple_offset = offset;
+	ent->tuple_len = len;
+	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
+
+	cur_ent = LIST_FIRST(&ctx->head);
+	while (cur_ent) {
+		uint32_t range_end = cur_ent->offset + cur_ent->len;
+		next_ent = LIST_NEXT(cur_ent, next);
+		prev_ent = cur_ent;
+		/* Iterate through overlapping ranges */
+		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
+			range_end = RTE_MAX(next_ent->offset + next_ent->len,
+				range_end);
+			if (start > next_ent->offset)
+				prev_ent = next_ent;
+
+			next_ent = LIST_NEXT(next_ent, next);
+		}
+
+		if (start < cur_ent->offset)
+			return insert_before(ctx, ent, cur_ent, next_ent,
+				start, end, range_end);
+		else if (start < range_end)
+			return insert_after(ctx, ent, cur_ent, next_ent,
+				prev_ent, end, range_end);
+
+		cur_ent = next_ent;
+		continue;
+	}
+
+	ent->lfsr = alloc_lfsr(ctx);
+	if (ent->lfsr == NULL) {
+		rte_free(ent);
+		return -ENOMEM;
+	}
+
+	/* generate nonoverlapping range [start, end) */
+	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+	if (LIST_EMPTY(&ctx->head)) {
+		LIST_INSERT_HEAD(&ctx->head, ent, next);
+	} else {
+		LIST_FOREACH(next_ent, &ctx->head, next)
+			prev_ent = next_ent;
+
+		LIST_INSERT_AFTER(prev_ent, ent, next);
+	}
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
 	return 0;
 }
 
 struct rte_thash_subtuple_helper *
-rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused)
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
 {
+	struct rte_thash_subtuple_helper *ent;
+
+	if ((ctx == NULL) || (name == NULL))
+		return NULL;
+
+	LIST_FOREACH(ent, &ctx->head, next) {
+		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
+			return ent;
+	}
+
 	return NULL;
 }
 
 uint32_t
-rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
-	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash)
 {
-	return 0;
+	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
 }
 
 const uint8_t *
-rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_get_key(struct rte_thash_ctx *ctx)
 {
-	return NULL;
+	return ctx->hash_key;
+}
+
+static inline void
+xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp ^= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+int
+rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
+	uint8_t *orig_tuple, uint32_t adj_bits,
+	rte_thash_check_tuple_t fn, void *userdata)
+{
+	unsigned i;
+
+	if ((h == NULL) || (orig_tuple == NULL))
+		return -EINVAL;
+
+	adj_bits &= h->lsb_msk;
+	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
+	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
+		uint8_t bit = (adj_bits >> i) & 0x1;
+		if (bit)
+			xor_bit(orig_tuple, bit,
+				h->tuple_offset + h->tuple_len - 1 - i);
+	}
+
+	if (fn != NULL)
+		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
+
+	return 0;
 }
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 38a641b..fd67931 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -360,6 +360,48 @@  __rte_experimental
 const uint8_t *
 rte_thash_get_key(struct rte_thash_ctx *ctx);
 
+/**
+ * Function prototype for the rte_thash_adjust_tuple
+ * to check if adjusted tuple could be used.
+ * Generally it is some kind of lookup function to check
+ * if adjusted tuple is already in use.
+ *
+ * @param userdata
+ *  Pointer to the userdata. It could be a pointer to the
+ *  table with used tuples to search.
+ * @param tuple
+ *  Pointer to the tuple to check
+ *
+ * @return
+ *  1 on success
+ *  0 otherwise
+ */
+typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
+
+/**
+ * Adjust tuple with complimentary bits.
+ *
+ * @param h
+ *  Pointer to the helper struct
+ * @param orig_tuple
+ *  Pointer to the tuple to be adjusted
+ * @param adj_bits
+ *  Valure returned by rte_thash_get_compliment
+ * @param fn
+ *  Callback function to check adjusted tuple. Could be NULL
+ * @param userdata
+ *  Pointer to the userdata to be passed to fn(). Could be NULL
+ *
+ * @return
+ *  0 on success
+ *  negative otherwise
+ */
+__rte_experimental
+int
+rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
+	uint8_t *orig_tuple, uint32_t adj_bits,
+	rte_thash_check_tuple_t fn, void *userdata);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
index 93cb230..a992a1e 100644
--- a/lib/librte_hash/version.map
+++ b/lib/librte_hash/version.map
@@ -32,6 +32,7 @@  DPDK_21 {
 EXPERIMENTAL {
 	global:
 
+	rte_thash_adjust_tuple;
 	rte_hash_free_key_with_position;
 	rte_hash_lookup_with_hash_bulk;
 	rte_hash_lookup_with_hash_bulk_data;