net/mlx5: introduce mlx5 hash list

Message ID 1572967680-93338-1-git-send-email-bingz@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: introduce mlx5 hash list |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-compilation success Compile Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Bing Zhao Nov. 5, 2019, 3:28 p.m. UTC
  Introduce simple hash list to the mlx5 utilities. User can define
its own data structure containing the mlx5_hlist_entry and create
the hash list table via the creation interface. Then the entry will
be inserted into the table and linked to the corresponding list
head. User should guarantee there is no collision of the key and
provide a callback function to handle all the remaining entries in
the table when destroying the hash list. User should define a proper
number of the list heads in the table in order to get a better
performance. The LSB of the 'key' is used to calculate the index of
the head in the list heads array.
This implementation is not multi-threads safe right now.

Signed-off-by: Bing Zhao <bingz@mellanox.com>
---
 drivers/net/mlx5/Makefile     |   1 +
 drivers/net/mlx5/meson.build  |   1 +
 drivers/net/mlx5/mlx5_utils.c | 126 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_utils.h | 106 ++++++++++++++++++++++++++++++++++-
 4 files changed, 232 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_utils.c
  

Comments

Slava Ovsiienko Nov. 5, 2019, 3:29 p.m. UTC | #1
> -----Original Message-----
> From: Bing Zhao <bingz@mellanox.com>
> Sent: Tuesday, November 5, 2019 17:28
> To: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; dev@dpdk.org
> Subject: [PATCH] net/mlx5: introduce mlx5 hash list
> 
> Introduce simple hash list to the mlx5 utilities. User can define its own data
> structure containing the mlx5_hlist_entry and create the hash list table via
> the creation interface. Then the entry will be inserted into the table and
> linked to the corresponding list head. User should guarantee there is no
> collision of the key and provide a callback function to handle all the
> remaining entries in the table when destroying the hash list. User should
> define a proper number of the list heads in the table in order to get a better
> performance. The LSB of the 'key' is used to calculate the index of the head
> in the list heads array.
> This implementation is not multi-threads safe right now.
> 
> Signed-off-by: Bing Zhao <bingz@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

> ---
>  drivers/net/mlx5/Makefile     |   1 +
>  drivers/net/mlx5/meson.build  |   1 +
>  drivers/net/mlx5/mlx5_utils.c | 126
> ++++++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_utils.h | 106
> ++++++++++++++++++++++++++++++++++-
>  4 files changed, 232 insertions(+), 2 deletions(-)  create mode 100644
> drivers/net/mlx5/mlx5_utils.c
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> dae5b9f..44cc26a 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -37,6 +37,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) +=
> mlx5_flow_verbs.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mp.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_utils.c
> 
>  ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
>  INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) diff --git
> a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> 02494cf..344f109 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -58,6 +58,7 @@ if build
>  		'mlx5_txq.c',
>  		'mlx5_vlan.c',
>  		'mlx5_devx_cmds.c',
> +		'mlx5_utils.c',
>  	)
>  	if (dpdk_conf.has('RTE_ARCH_X86_64')
>  		or dpdk_conf.has('RTE_ARCH_ARM64')
> diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
> new file mode 100644 index 0000000..0181f49
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_utils.c
> @@ -0,0 +1,126 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2019 Mellanox Technologies, Ltd  */
> +
> +#include <rte_malloc.h>
> +
> +#include "mlx5_utils.h"
> +
> +struct mlx5_hlist *
> +mlx5_hlist_create(char *name, uint64_t size,
> +mlx5_hlist_destroy_callback_fn cb) {
> +	struct mlx5_hlist *h;
> +	uint64_t act_size;
> +	uint64_t alloc_size;
> +
> +	if (!size)
> +		return NULL;
> +	/* If the low 32B is power of 2, then the 64B integer is too. */
> +	if (!rte_is_power_of_2((uint32_t)size)) {
> +		act_size = rte_align64pow2(size);
> +		DRV_LOG(WARNING, "Size 0x%" PRIX64 " is not power of 2,
> will "
> +			"be aligned to 0x%" PRIX64 ".\n", size, act_size);
> +	} else {
> +		act_size = size;
> +	}
> +	alloc_size = sizeof(struct mlx5_hlist) +
> +		     sizeof(struct mlx5_hlist_head) * act_size;
> +	/* Using zmalloc, then no need to initialize the heads. */
> +	h = rte_zmalloc(name, alloc_size, RTE_CACHE_LINE_SIZE);
> +	if (!h) {
> +		DRV_LOG(ERR, "No memory for hash list %s creation\n",
> +			name ? name : "None");
> +		return NULL;
> +	}
> +	if (name)
> +		snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
> +	if (!cb)
> +		DRV_LOG(WARNING, "No callback function is specified, will
> use "
> +			"rte_free when destroying hlist %p %s\n",
> +			(void *)h, h->name);
> +	else
> +		h->cb = cb;
> +	h->table_sz = act_size;
> +	h->mask = act_size - 1;
> +	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX64 ", callback "
> +		"0x%" PRIX64 "is created.\n",
> +		h->name, act_size, (uint64_t)(uintptr_t)cb);
> +	return h;
> +}
> +
> +struct mlx5_hlist_entry *
> +mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key) {
> +	uint64_t idx;
> +	struct mlx5_hlist_head *first;
> +	struct mlx5_hlist_entry *node;
> +
> +	assert(h);
> +	idx = key & h->mask;
> +	first = &h->heads[idx];
> +	LIST_FOREACH(node, first, next) {
> +		if (node->key == key)
> +			return node;
> +	}
> +	return NULL;
> +}
> +
> +int
> +mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
> +{
> +	uint64_t idx;
> +	struct mlx5_hlist_head *first;
> +	struct mlx5_hlist_entry *node;
> +
> +	assert(h && entry);
> +	idx = entry->key & h->mask;
> +	first = &h->heads[idx];
> +	/* No need to reuse the lookup function. */
> +	LIST_FOREACH(node, first, next) {
> +		if (node->key == entry->key)
> +			return -EEXIST;
> +	}
> +	LIST_INSERT_HEAD(first, entry, next);
> +	return 0;
> +}
> +
> +void
> +mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
> +		  struct mlx5_hlist_entry *entry)
> +{
> +	assert(entry && entry->next.le_prev);
> +	LIST_REMOVE(entry, next);
> +	/* Set to NULL to get rid of removing action for more than once. */
> +	entry->next.le_prev = NULL;
> +}
> +
> +void
> +mlx5_hlist_destroy(struct mlx5_hlist *h) {
> +	uint64_t idx;
> +	struct mlx5_hlist_entry *entry;
> +	mlx5_hlist_destroy_callback_fn cb;
> +
> +	assert(h);
> +	cb = h->cb;
> +	for (idx = 0; idx < h->table_sz; ++idx) {
> +		/* no LIST_FOREACH_SAFE, using while instead */
> +		while (!LIST_EMPTY(&h->heads[idx])) {
> +			entry = LIST_FIRST(&h->heads[idx]);
> +			LIST_REMOVE(entry, next);
> +			/*
> +			 * The owner of whole element which contains data
> entry
> +			 * is the user, so it's the user's duty to do the clean
> +			 * up and the free work because someone may not
> put the
> +			 * hlist entry at the beginning(suggested to locate at
> +			 * the beginning). Or else the default free function
> +			 * will be used.
> +			 */
> +			if (cb)
> +				h->cb(entry);
> +			else
> +				rte_free(entry);
> +		}
> +	}
> +	rte_free(h);
> +}
> diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
> index 97092c7..81da50d 100644
> --- a/drivers/net/mlx5/mlx5_utils.h
> +++ b/drivers/net/mlx5/mlx5_utils.h
> @@ -151,13 +151,13 @@
>  	snprintf(name, sizeof(name), __VA_ARGS__)
> 
>  /**
> - * Return nearest power of two above input value.
> + * Return logarithm of the nearest power of two above input value.
>   *
>   * @param v
>   *   Input value.
>   *
>   * @return
> - *   Nearest power of two above input value.
> + *   Logarithm of the nearest power of two above input value.
>   */
>  static inline unsigned int
>  log2above(unsigned int v)
> @@ -170,4 +170,106 @@
>  	return l + r;
>  }
> 
> +/** Maximum size of string for naming the hlist table. */
> +#define MLX5_HLIST_NAMESIZE			32
> +
> +/**
> + * Structure of the entry in the hash list, user should define its own
> +struct
> + * that contains this in order to store the data. The 'key' is 64-bits
> +right
> + * now and its user's responsibility to guarantee there is no collision.
> + */
> +struct mlx5_hlist_entry {
> +	LIST_ENTRY(mlx5_hlist_entry) next; /* entry pointers in the list. */
> +	uint64_t key; /* user defined 'key', could be the hash signature. */
> +};
> +
> +/** Structure for hash head. */
> +LIST_HEAD(mlx5_hlist_head, mlx5_hlist_entry);
> +
> +/** Type of function that is used to handle the data before freeing. */
> +typedef void (*mlx5_hlist_destroy_callback_fn)(void *p);
> +
> +/** hash list table structure */
> +struct mlx5_hlist {
> +	char name[MLX5_HLIST_NAMESIZE]; /**< Name of the hash list. */
> +	/**< number of heads, need to be power of 2. */
> +	uint64_t table_sz;
> +	/**< mask to get the index of the list heads. */
> +	uint64_t mask;
> +	/**< The callback function before free when destroying hash list. */
> +	mlx5_hlist_destroy_callback_fn cb;
> +	struct mlx5_hlist_head heads[];	/**< list head arrays. */
> +};
> +
> +/**
> + * Create a hash list table, the user can specify the list heads array
> +size
> + * of the table, now the size should be a power of 2 in order to get
> +better
> + * distribution for the entries. Each entry is a part of the whole data
> +element
> + * and the caller should be responsible for the data element's
> +allocation and
> + * cleanup / free.
> + *
> + * @param name
> + *   Name of the hash list(optional).
> + * @param size
> + *   Heads array size of the hash list.
> + * @param cb
> + *   Callback function for each inserted entries when destroying the hash list.
> + *
> + * @return
> + *   Pointer of the hash list table created, NULL on failure.
> + */
> +struct mlx5_hlist *mlx5_hlist_create(char *name, uint64_t size,
> +				     mlx5_hlist_destroy_callback_fn cb);
> +
> +/**
> + * Search an entry matching the key.
> + *
> + * @param h
> + *   Pointer to the hast list table.
> + * @param key
> + *   Key for the searching entry.
> + *
> + * @return
> + *   Pointer of the hlist entry if found, NULL otherwise.
> + */
> +struct mlx5_hlist_entry *mlx5_hlist_lookup(struct mlx5_hlist *h,
> +uint64_t key);
> +
> +/**
> + * Insert an entry to the hash list table, the entry is only part of
> +whole data
> + * element and a 64B key is used for matching. User should construct
> +the key or
> + * give a calculated hash signature and guarantee there is no collision.
> + *
> + * @param h
> + *   Pointer to the hast list table.
> + * @param entry
> + *   Entry to be inserted into the hash list table.
> + *
> + * @return
> + *   - zero for success.
> + *   - -EEXIST if the entry is already inserted.
> + */
> +int mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry
> +*entry);
> +
> +/**
> + * Remove an entry from the hash list table. User should guarantee the
> +validity
> + * of the entry.
> + *
> + * @param h
> + *   Pointer to the hast list table. (not used)
> + * @param entry
> + *   Entry to be removed from the hash list table.
> + */
> +void mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
> +		       struct mlx5_hlist_entry *entry);
> +
> +/**
> + * Destroy the hash list table, all the entries already inserted into
> +the lists
> + * will be handled by the callback function provided by the user
> +(including
> + * free if needed) before the table is freed.
> + *
> + * @param h
> + *   Pointer to the hast list table.
> + */
> +void mlx5_hlist_destroy(struct mlx5_hlist *h);
> +
>  #endif /* RTE_PMD_MLX5_UTILS_H_ */
> --
> 1.8.3.1
  
Raslan Darawsheh Nov. 5, 2019, 4:02 p.m. UTC | #2
Hi,

> -----Original Message-----
> From: Bing Zhao <bingz@mellanox.com>
> Sent: Tuesday, November 5, 2019 5:28 PM
> To: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>;
> dev@dpdk.org
> Subject: [PATCH] net/mlx5: introduce mlx5 hash list
> 
> Introduce simple hash list to the mlx5 utilities. User can define its own data
> structure containing the mlx5_hlist_entry and create the hash list table via
> the creation interface. Then the entry will be inserted into the table and
> linked to the corresponding list head. User should guarantee there is no
> collision of the key and provide a callback function to handle all the remaining
> entries in the table when destroying the hash list. User should define a
> proper number of the list heads in the table in order to get a better
> performance. The LSB of the 'key' is used to calculate the index of the head
> in the list heads array.
> This implementation is not multi-threads safe right now.
> 
> Signed-off-by: Bing Zhao <bingz@mellanox.com>
> ---

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  
Stephen Hemminger Nov. 5, 2019, 5:20 p.m. UTC | #3
On Tue,  5 Nov 2019 17:28:00 +0200
Bing Zhao <bingz@mellanox.com> wrote:

> Introduce simple hash list to the mlx5 utilities. User can define
> its own data structure containing the mlx5_hlist_entry and create
> the hash list table via the creation interface. Then the entry will
> be inserted into the table and linked to the corresponding list
> head. User should guarantee there is no collision of the key and
> provide a callback function to handle all the remaining entries in
> the table when destroying the hash list. User should define a proper
> number of the list heads in the table in order to get a better
> performance. The LSB of the 'key' is used to calculate the index of
> the head in the list heads array.
> This implementation is not multi-threads safe right now.
> 
> Signed-off-by: Bing Zhao <bingz@mellanox.com>

There is already a plethora of hash routines in DPDK, why introduce
a chained hash list like Linux kernel?
  
Stephen Hemminger Nov. 5, 2019, 5:25 p.m. UTC | #4
On Tue,  5 Nov 2019 17:28:00 +0200
Bing Zhao <bingz@mellanox.com> wrote:

> +struct mlx5_hlist *
> +mlx5_hlist_create(char *name, uint64_t size, mlx5_hlist_destroy_callback_fn cb)

Name is unused in rte_malloc, so why bother with it here?
> +{
> +	struct mlx5_hlist *h;
> +	uint64_t act_size;
> +	uint64_t alloc_size;
> +
> +	if (!size)
> +		return NULL;
> +	/* If the low 32B is power of 2, then the 64B integer is too. */
> +	if (!rte_is_power_of_2((uint32_t)size)) {

If you are doing cast, therefore you are truncating, therefore size
can't really be a uint64_t and should be size_t or uint32_t.

> +		act_size = rte_align64pow2(size);
> +		DRV_LOG(WARNING, "Size 0x%" PRIX64 " is not power of 2, will "
> +			"be aligned to 0x%" PRIX64 ".\n", size, act_size);
> +	} else {
> +		act_size = size;
> +	}

Power of 2 hash list size wastes significant amount of memory.

> +	alloc_size = sizeof(struct mlx5_hlist) +
> +		     sizeof(struct mlx5_hlist_head) * act_size;
> +	/* Using zmalloc, then no need to initialize the heads. */
> +	h = rte_zmalloc(name, alloc_size, RTE_CACHE_LINE_SIZE);
Why not use rte_calloc?

Why use rte_malloc at all? Does this need to be accessed from hardware
or shared between primary/secondary.  Also rte_malloc has less tooling
support (coverity, leak checking, etc) than standard malloc.


> +	if (!h) {
> +		DRV_LOG(ERR, "No memory for hash list %s creation\n",
> +			name ? name : "None");
> +		return NULL;
> +	}
> +	if (name)
> +		snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
> +	if (!cb)
> +		DRV_LOG(WARNING, "No callback function is specified, will use "
> +			"rte_free when destroying hlist %p %s\n",
> +			(void *)h, h->name);
> +	else
> +		h->cb = cb;
> +	h->table_sz = act_size;
> +	h->mask = act_size - 1;
> +	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX64 ", callback "
> +		"0x%" PRIX64 "is created.\n",
> +		h->name, act_size, (uint64_t)(uintptr_t)cb);
> +	return h;
> +}
  
Stephen Hemminger Nov. 5, 2019, 5:26 p.m. UTC | #5
On Tue, 5 Nov 2019 16:02:18 +0000
Raslan Darawsheh <rasland@mellanox.com> wrote:

> Hi,
> 
> > -----Original Message-----
> > From: Bing Zhao <bingz@mellanox.com>
> > Sent: Tuesday, November 5, 2019 5:28 PM
> > To: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> > <viacheslavo@mellanox.com>; Raslan Darawsheh <rasland@mellanox.com>;
> > dev@dpdk.org
> > Subject: [PATCH] net/mlx5: introduce mlx5 hash list
> > 
> > Introduce simple hash list to the mlx5 utilities. User can define its own data
> > structure containing the mlx5_hlist_entry and create the hash list table via
> > the creation interface. Then the entry will be inserted into the table and
> > linked to the corresponding list head. User should guarantee there is no
> > collision of the key and provide a callback function to handle all the remaining
> > entries in the table when destroying the hash list. User should define a
> > proper number of the list heads in the table in order to get a better
> > performance. The LSB of the 'key' is used to calculate the index of the head
> > in the list heads array.
> > This implementation is not multi-threads safe right now.
> > 
> > Signed-off-by: Bing Zhao <bingz@mellanox.com>
> > ---  
> 
> Patch applied to next-net-mlx,

Please don't apply so fast! Not allowing a few days for review means
that it is just getting rubber stamped and not allowing for any feedback.
  
Slava Ovsiienko Nov. 5, 2019, 6:49 p.m. UTC | #6
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, November 5, 2019 19:21
> To: Bing Zhao <bingz@mellanox.com>
> Cc: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/mlx5: introduce mlx5 hash list
> 
> On Tue,  5 Nov 2019 17:28:00 +0200
> Bing Zhao <bingz@mellanox.com> wrote:
> 
> > Introduce simple hash list to the mlx5 utilities. User can define its
> > own data structure containing the mlx5_hlist_entry and create the hash
> > list table via the creation interface. Then the entry will be inserted
> > into the table and linked to the corresponding list head. User should
> > guarantee there is no collision of the key and provide a callback
> > function to handle all the remaining entries in the table when
> > destroying the hash list. User should define a proper number of the
> > list heads in the table in order to get a better performance. The LSB
> > of the 'key' is used to calculate the index of the head in the list
> > heads array.
> > This implementation is not multi-threads safe right now.
> >
> > Signed-off-by: Bing Zhao <bingz@mellanox.com>
> 
> There is already a plethora of hash routines in DPDK, why introduce a
> chained hash list like Linux kernel?

We have multiple features in mlx5 requiring the run-time extendable storage.
For example, application/user might not know how many Flows are going to come
into play, it is not nice to allocate gigabytes at startup, assuming some fixed upper limit (10M+).
Simple lists (used before) are too slow, update the DPDK Cuckoo to be scaled is not trivial
and requires some time, so - simple hashed list seems  to be acceptable palliative for now.

With best regards, Slava
  
Bing Zhao Nov. 6, 2019, 7:45 a.m. UTC | #7
Hi Stephen,
Many thanks for your reviewing and comments. PSB.

BR. Bing

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Wednesday, November 6, 2019 1:25 AM
> To: Bing Zhao <bingz@mellanox.com>
> Cc: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> <viacheslavo@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/mlx5: introduce mlx5 hash list
> 
> On Tue,  5 Nov 2019 17:28:00 +0200
> Bing Zhao <bingz@mellanox.com> wrote:
> 
> > +struct mlx5_hlist *
> > +mlx5_hlist_create(char *name, uint64_t size,
> > +mlx5_hlist_destroy_callback_fn cb)
> 
> Name is unused in rte_malloc, so why bother with it here?

[Bing]: The name is also saved in the hash list, though it is never used
either in the following stage. But saving such string will give some help when
facing some memory corruption issue. And in practice, such const string in
rte_*alloc_* functions will help to record more allocation information if
there is a wrap for debugging.

> > +{
> > +	struct mlx5_hlist *h;
> > +	uint64_t act_size;
> > +	uint64_t alloc_size;
> > +
> > +	if (!size)
> > +		return NULL;
> > +	/* If the low 32B is power of 2, then the 64B integer is too. */
> > +	if (!rte_is_power_of_2((uint32_t)size)) {
> 
> If you are doing cast, therefore you are truncating, therefore size can't
> really be a uint64_t and should be size_t or uint32_t.


[Bing]: Nice catch, thanks. Yes, if is truncated, the validation might be
incorrect, 0x11 1000 0000 is not a power of 2 even if 0x1000 0000 is.
And indeed, there is no such big array of head is needed in practice. U32
should be enough. I will correct it.

> 
> > +		act_size = rte_align64pow2(size);
> > +		DRV_LOG(WARNING, "Size 0x%" PRIX64 " is not power
> of 2, will "
> > +			"be aligned to 0x%" PRIX64 ".\n", size, act_size);
> > +	} else {
> > +		act_size = size;
> > +	}
> 
> Power of 2 hash list size wastes significant amount of memory.

[Bing]: In Koh's implementation, '%' is used. In general, '&' and a little
faster than "%". If '%' is used, then there is no such restriction of the head
array size. And to my understanding, if the hash signature is calculated
perfectly, then the power of 2 hash list size will make all the entries
distributed fairly among the lists.  Correct me if anything wrong.

> 
> > +	alloc_size = sizeof(struct mlx5_hlist) +
> > +		     sizeof(struct mlx5_hlist_head) * act_size;
> > +	/* Using zmalloc, then no need to initialize the heads. */
> > +	h = rte_zmalloc(name, alloc_size, RTE_CACHE_LINE_SIZE);
> Why not use rte_calloc?
> 
> Why use rte_malloc at all? Does this need to be accessed from
> hardware or shared between primary/secondary.  Also rte_malloc has
> less tooling support (coverity, leak checking, etc) than standard malloc.
> 

[Bing]: Yes, there is no need, and for each rte memory allocation, the
overhead is 64B but libc malloc sometimes only occupies 16B. Other stuffs
working together with the lists will be allocated by rte APIs. To my
understanding, it is better to keep them "near" each other inside the memory.
@Slava, what's your opinion?

> 
> > +	if (!h) {
> > +		DRV_LOG(ERR, "No memory for hash list %s
> creation\n",
> > +			name ? name : "None");
> > +		return NULL;
> > +	}
> > +	if (name)
> > +		snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s",
> name);
> > +	if (!cb)
> > +		DRV_LOG(WARNING, "No callback function is specified,
> will use "
> > +			"rte_free when destroying hlist %p %s\n",
> > +			(void *)h, h->name);
> > +	else
> > +		h->cb = cb;
> > +	h->table_sz = act_size;
> > +	h->mask = act_size - 1;
> > +	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX64 ",
> callback "
> > +		"0x%" PRIX64 "is created.\n",
> > +		h->name, act_size, (uint64_t)(uintptr_t)cb);
> > +	return h;
> > +}
  
Raslan Darawsheh Nov. 6, 2019, 8:14 a.m. UTC | #8
Hi Stephen,

You are totally right about it, but, I needed this patch for some testing. So Will remove it from next-net-mlx for now, and will be waiting for some final version to be sent.

Kindest regards,
Raslan Darawsheh

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, November 5, 2019 7:27 PM
> To: Raslan Darawsheh <rasland@mellanox.com>
> Cc: Bing Zhao <bingz@mellanox.com>; Ori Kam <orika@mellanox.com>;
> Slava Ovsiienko <viacheslavo@mellanox.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/mlx5: introduce mlx5 hash list
> 
> On Tue, 5 Nov 2019 16:02:18 +0000
> Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> > Hi,
> >
> > > -----Original Message-----
> > > From: Bing Zhao <bingz@mellanox.com>
> > > Sent: Tuesday, November 5, 2019 5:28 PM
> > > To: Ori Kam <orika@mellanox.com>; Slava Ovsiienko
> > > <viacheslavo@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>;
> > > dev@dpdk.org
> > > Subject: [PATCH] net/mlx5: introduce mlx5 hash list
> > >
> > > Introduce simple hash list to the mlx5 utilities. User can define
> > > its own data structure containing the mlx5_hlist_entry and create
> > > the hash list table via the creation interface. Then the entry will
> > > be inserted into the table and linked to the corresponding list
> > > head. User should guarantee there is no collision of the key and
> > > provide a callback function to handle all the remaining entries in
> > > the table when destroying the hash list. User should define a proper
> > > number of the list heads in the table in order to get a better
> > > performance. The LSB of the 'key' is used to calculate the index of the
> head in the list heads array.
> > > This implementation is not multi-threads safe right now.
> > >
> > > Signed-off-by: Bing Zhao <bingz@mellanox.com>
> > > ---
> >
> > Patch applied to next-net-mlx,
> 
> Please don't apply so fast! Not allowing a few days for review means that it is
> just getting rubber stamped and not allowing for any feedback.
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index dae5b9f..44cc26a 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -37,6 +37,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mp.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_utils.c
 
 ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index 02494cf..344f109 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -58,6 +58,7 @@  if build
 		'mlx5_txq.c',
 		'mlx5_vlan.c',
 		'mlx5_devx_cmds.c',
+		'mlx5_utils.c',
 	)
 	if (dpdk_conf.has('RTE_ARCH_X86_64')
 		or dpdk_conf.has('RTE_ARCH_ARM64')
diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
new file mode 100644
index 0000000..0181f49
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -0,0 +1,126 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2019 Mellanox Technologies, Ltd
+ */
+
+#include <rte_malloc.h>
+
+#include "mlx5_utils.h"
+
+struct mlx5_hlist *
+mlx5_hlist_create(char *name, uint64_t size, mlx5_hlist_destroy_callback_fn cb)
+{
+	struct mlx5_hlist *h;
+	uint64_t act_size;
+	uint64_t alloc_size;
+
+	if (!size)
+		return NULL;
+	/* If the low 32B is power of 2, then the 64B integer is too. */
+	if (!rte_is_power_of_2((uint32_t)size)) {
+		act_size = rte_align64pow2(size);
+		DRV_LOG(WARNING, "Size 0x%" PRIX64 " is not power of 2, will "
+			"be aligned to 0x%" PRIX64 ".\n", size, act_size);
+	} else {
+		act_size = size;
+	}
+	alloc_size = sizeof(struct mlx5_hlist) +
+		     sizeof(struct mlx5_hlist_head) * act_size;
+	/* Using zmalloc, then no need to initialize the heads. */
+	h = rte_zmalloc(name, alloc_size, RTE_CACHE_LINE_SIZE);
+	if (!h) {
+		DRV_LOG(ERR, "No memory for hash list %s creation\n",
+			name ? name : "None");
+		return NULL;
+	}
+	if (name)
+		snprintf(h->name, MLX5_HLIST_NAMESIZE, "%s", name);
+	if (!cb)
+		DRV_LOG(WARNING, "No callback function is specified, will use "
+			"rte_free when destroying hlist %p %s\n",
+			(void *)h, h->name);
+	else
+		h->cb = cb;
+	h->table_sz = act_size;
+	h->mask = act_size - 1;
+	DRV_LOG(DEBUG, "Hash list with %s size 0x%" PRIX64 ", callback "
+		"0x%" PRIX64 "is created.\n",
+		h->name, act_size, (uint64_t)(uintptr_t)cb);
+	return h;
+}
+
+struct mlx5_hlist_entry *
+mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key)
+{
+	uint64_t idx;
+	struct mlx5_hlist_head *first;
+	struct mlx5_hlist_entry *node;
+
+	assert(h);
+	idx = key & h->mask;
+	first = &h->heads[idx];
+	LIST_FOREACH(node, first, next) {
+		if (node->key == key)
+			return node;
+	}
+	return NULL;
+}
+
+int
+mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry)
+{
+	uint64_t idx;
+	struct mlx5_hlist_head *first;
+	struct mlx5_hlist_entry *node;
+
+	assert(h && entry);
+	idx = entry->key & h->mask;
+	first = &h->heads[idx];
+	/* No need to reuse the lookup function. */
+	LIST_FOREACH(node, first, next) {
+		if (node->key == entry->key)
+			return -EEXIST;
+	}
+	LIST_INSERT_HEAD(first, entry, next);
+	return 0;
+}
+
+void
+mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
+		  struct mlx5_hlist_entry *entry)
+{
+	assert(entry && entry->next.le_prev);
+	LIST_REMOVE(entry, next);
+	/* Set to NULL to get rid of removing action for more than once. */
+	entry->next.le_prev = NULL;
+}
+
+void
+mlx5_hlist_destroy(struct mlx5_hlist *h)
+{
+	uint64_t idx;
+	struct mlx5_hlist_entry *entry;
+	mlx5_hlist_destroy_callback_fn cb;
+
+	assert(h);
+	cb = h->cb;
+	for (idx = 0; idx < h->table_sz; ++idx) {
+		/* no LIST_FOREACH_SAFE, using while instead */
+		while (!LIST_EMPTY(&h->heads[idx])) {
+			entry = LIST_FIRST(&h->heads[idx]);
+			LIST_REMOVE(entry, next);
+			/*
+			 * The owner of whole element which contains data entry
+			 * is the user, so it's the user's duty to do the clean
+			 * up and the free work because someone may not put the
+			 * hlist entry at the beginning(suggested to locate at
+			 * the beginning). Or else the default free function
+			 * will be used.
+			 */
+			if (cb)
+				h->cb(entry);
+			else
+				rte_free(entry);
+		}
+	}
+	rte_free(h);
+}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index 97092c7..81da50d 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -151,13 +151,13 @@ 
 	snprintf(name, sizeof(name), __VA_ARGS__)
 
 /**
- * Return nearest power of two above input value.
+ * Return logarithm of the nearest power of two above input value.
  *
  * @param v
  *   Input value.
  *
  * @return
- *   Nearest power of two above input value.
+ *   Logarithm of the nearest power of two above input value.
  */
 static inline unsigned int
 log2above(unsigned int v)
@@ -170,4 +170,106 @@ 
 	return l + r;
 }
 
+/** Maximum size of string for naming the hlist table. */
+#define MLX5_HLIST_NAMESIZE			32
+
+/**
+ * Structure of the entry in the hash list, user should define its own struct
+ * that contains this in order to store the data. The 'key' is 64-bits right
+ * now and its user's responsibility to guarantee there is no collision.
+ */
+struct mlx5_hlist_entry {
+	LIST_ENTRY(mlx5_hlist_entry) next; /* entry pointers in the list. */
+	uint64_t key; /* user defined 'key', could be the hash signature. */
+};
+
+/** Structure for hash head. */
+LIST_HEAD(mlx5_hlist_head, mlx5_hlist_entry);
+
+/** Type of function that is used to handle the data before freeing. */
+typedef void (*mlx5_hlist_destroy_callback_fn)(void *p);
+
+/** hash list table structure */
+struct mlx5_hlist {
+	char name[MLX5_HLIST_NAMESIZE]; /**< Name of the hash list. */
+	/**< number of heads, need to be power of 2. */
+	uint64_t table_sz;
+	/**< mask to get the index of the list heads. */
+	uint64_t mask;
+	/**< The callback function before free when destroying hash list. */
+	mlx5_hlist_destroy_callback_fn cb;
+	struct mlx5_hlist_head heads[];	/**< list head arrays. */
+};
+
+/**
+ * Create a hash list table, the user can specify the list heads array size
+ * of the table, now the size should be a power of 2 in order to get better
+ * distribution for the entries. Each entry is a part of the whole data element
+ * and the caller should be responsible for the data element's allocation and
+ * cleanup / free.
+ *
+ * @param name
+ *   Name of the hash list(optional).
+ * @param size
+ *   Heads array size of the hash list.
+ * @param cb
+ *   Callback function for each inserted entries when destroying the hash list.
+ *
+ * @return
+ *   Pointer of the hash list table created, NULL on failure.
+ */
+struct mlx5_hlist *mlx5_hlist_create(char *name, uint64_t size,
+				     mlx5_hlist_destroy_callback_fn cb);
+
+/**
+ * Search an entry matching the key.
+ *
+ * @param h
+ *   Pointer to the hast list table.
+ * @param key
+ *   Key for the searching entry.
+ *
+ * @return
+ *   Pointer of the hlist entry if found, NULL otherwise.
+ */
+struct mlx5_hlist_entry *mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key);
+
+/**
+ * Insert an entry to the hash list table, the entry is only part of whole data
+ * element and a 64B key is used for matching. User should construct the key or
+ * give a calculated hash signature and guarantee there is no collision.
+ *
+ * @param h
+ *   Pointer to the hast list table.
+ * @param entry
+ *   Entry to be inserted into the hash list table.
+ *
+ * @return
+ *   - zero for success.
+ *   - -EEXIST if the entry is already inserted.
+ */
+int mlx5_hlist_insert(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry);
+
+/**
+ * Remove an entry from the hash list table. User should guarantee the validity
+ * of the entry.
+ *
+ * @param h
+ *   Pointer to the hast list table. (not used)
+ * @param entry
+ *   Entry to be removed from the hash list table.
+ */
+void mlx5_hlist_remove(struct mlx5_hlist *h __rte_unused,
+		       struct mlx5_hlist_entry *entry);
+
+/**
+ * Destroy the hash list table, all the entries already inserted into the lists
+ * will be handled by the callback function provided by the user (including
+ * free if needed) before the table is freed.
+ *
+ * @param h
+ *   Pointer to the hast list table.
+ */
+void mlx5_hlist_destroy(struct mlx5_hlist *h);
+
 #endif /* RTE_PMD_MLX5_UTILS_H_ */