[v4,6/9] eal: register non-EAL threads as lcores
diff mbox series

Message ID 20200626144736.11011-7-david.marchand@redhat.com
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series
  • Register non-EAL threads as lcore
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

David Marchand June 26, 2020, 2:47 p.m. UTC
DPDK allows calling some part of its API from a non-EAL thread but this
has some limitations.
OVS (and other applications) has its own thread management but still
want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
faking EAL threads potentially unknown of some DPDK component.

Introduce a new API to register non-EAL thread and associate them to a
free lcore with a new NON_EAL role.
This role denotes lcores that do not run DPDK mainloop and as such
prevents use of rte_eal_wait_lcore() and consorts.

Multiprocess is not supported as the need for cohabitation with this new
feature is unclear at the moment.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
---
Changes since v2:
- refused multiprocess init once rte_thread_register got called, and
  vice versa,
- added warning on multiprocess in rte_thread_register doxygen,

Changes since v1:
- moved cleanup on lcore role code in patch 5,
- added unit test,
- updated documentation,
- changed naming from "external thread" to "registered non-EAL thread"

---
 MAINTAINERS                                   |   1 +
 app/test/Makefile                             |   1 +
 app/test/autotest_data.py                     |   6 +
 app/test/meson.build                          |   2 +
 app/test/test_lcores.c                        | 139 ++++++++++++++++++
 doc/guides/howto/debug_troubleshoot.rst       |   5 +-
 .../prog_guide/env_abstraction_layer.rst      |  22 +--
 doc/guides/prog_guide/mempool_lib.rst         |   2 +-
 lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
 lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
 lib/librte_eal/common/eal_common_thread.c     |  33 +++++
 lib/librte_eal/common/eal_memcfg.h            |  10 ++
 lib/librte_eal/common/eal_private.h           |  18 +++
 lib/librte_eal/freebsd/eal.c                  |   4 +
 lib/librte_eal/include/rte_lcore.h            |  25 +++-
 lib/librte_eal/linux/eal.c                    |   4 +
 lib/librte_eal/rte_eal_version.map            |   2 +
 lib/librte_mempool/rte_mempool.h              |  11 +-
 18 files changed, 349 insertions(+), 22 deletions(-)
 create mode 100644 app/test/test_lcores.c

Comments

Ananyev, Konstantin June 29, 2020, 2:27 p.m. UTC | #1
> diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
> index 86d32a3dd7..a61824a779 100644
> --- a/lib/librte_eal/common/eal_common_lcore.c
> +++ b/lib/librte_eal/common/eal_common_lcore.c
> @@ -6,13 +6,15 @@
>  #include <limits.h>
>  #include <string.h>
> 
> -#include <rte_errno.h>
> -#include <rte_log.h>
> -#include <rte_eal.h>
> -#include <rte_lcore.h>
>  #include <rte_common.h>
>  #include <rte_debug.h>
> +#include <rte_eal.h>
> +#include <rte_errno.h>
> +#include <rte_lcore.h>
> +#include <rte_log.h>
> +#include <rte_spinlock.h>
> 
> +#include "eal_memcfg.h"
>  #include "eal_private.h"
>  #include "eal_thread.h"
> 
> @@ -220,3 +222,43 @@ rte_socket_id_by_idx(unsigned int idx)
>  	}
>  	return config->numa_nodes[idx];
>  }
> +
> +static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
> +
> +unsigned int
> +eal_lcore_non_eal_allocate(void)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +	unsigned int lcore_id;
> +
> +	if (cfg->process_type == RTE_PROC_SECONDARY ||
> +			!eal_mcfg_forbid_multiprocess()) {
> +		RTE_LOG(ERR, EAL, "Multiprocess in use, cannot allocate new lcore.\n");
> +		return RTE_MAX_LCORE;
> +	}
> +	rte_spinlock_lock(&lcore_lock);
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
> +			continue;
> +		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
> +		cfg->lcore_count++;
> +		break;
> +	}
> +	if (lcore_id == RTE_MAX_LCORE)
> +		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
> +	rte_spinlock_unlock(&lcore_lock);
> +	return lcore_id;
> +}
> +
> +void
> +eal_lcore_non_eal_release(unsigned int lcore_id)
> +{
> +	struct rte_config *cfg = rte_eal_get_configuration();
> +
> +	rte_spinlock_lock(&lcore_lock);
> +	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
> +		cfg->lcore_role[lcore_id] = ROLE_OFF;
> +		cfg->lcore_count--;
> +	}
> +	rte_spinlock_unlock(&lcore_lock);
> +}
> diff --git a/lib/librte_eal/common/eal_common_mcfg.c b/lib/librte_eal/common/eal_common_mcfg.c
> index 49d3ed0ce5..5b42d454e2 100644
> --- a/lib/librte_eal/common/eal_common_mcfg.c
> +++ b/lib/librte_eal/common/eal_common_mcfg.c
> @@ -44,6 +44,42 @@ eal_mcfg_check_version(void)
>  	return 0;
>  }
> 
> +enum mp_status {
> +	MP_UNKNOWN,
> +	MP_FORBIDDEN,
> +	MP_ENABLED,
> +};
> +
> +static bool
> +eal_mcfg_set_mp_status(enum mp_status status)
> +{
> +	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
> +	uint8_t expected;
> +	uint8_t desired;
> +
> +	expected = MP_UNKNOWN;
> +	desired = status;
> +	if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
> +			false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
> +		return true;
> +
> +	return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
> +}
> +
> +bool
> +eal_mcfg_forbid_multiprocess(void)
> +{
> +	assert(rte_eal_get_configuration()->process_type == RTE_PROC_PRIMARY);
> +	return eal_mcfg_set_mp_status(MP_FORBIDDEN);
> +}
> +
> +bool
> +eal_mcfg_enable_multiprocess(void)
> +{
> +	assert(rte_eal_get_configuration()->process_type == RTE_PROC_SECONDARY);
> +	return eal_mcfg_set_mp_status(MP_ENABLED);
> +}

I still don't think it is a good idea to allow to change primary proc behaviour
(allow/forbid secondary procs to attach) on the fly.
Imagine the situation - there is a primary proc (supposed to run forever)
that does  rte_thread_register/rte_thread_unregister during its lifetime.
Plus from time to time user runs some secondary process to collect stats/debug
the primary one (proc-info or so).
Now behaviour of such system will be completely non-deterministic:
In some runs primary proc will do rte_thread_register() first, and then secondary
proc will be never able to attach.
In other cases - secondary will win the race, and then for primary 
eal_lcore_non_eal_allocate() will always fail.
Which means different behaviour, significantly varying performance, etc.

I am not big fun to introduce such workaround at all, but at least startup flag,
will guarantee consistent behaviour: secondary proc will always fail to attach
and  eal_lcore_non_eal_allocate() will always succeed
(as long as there are free lcore_ids off-course).

From your previous mail:
> A EAL flag is a stable API from the start, as there is nothing
> describing how we can remove one.
> So a new EAL flag for an experimental API/feature seems contradictory.

Hm, yes there is a gap, but why eal flag can't also be an experimental one?
What will be the difference between flag and API call here?
We can still reserve the right to remove/change it at any time.

As another thought about startup parameters -
would it make sense to have new one: --lcore-allow-list=...?
That would limit lcore_ids available for the process.
Without this new parameter specified -
lcore_allowed_list would be equal to startup lcore list (static ones),
and no dynamic lcore allocations will be allowed.
As an example:
dpdk_app --lcores=6,7 --lcore-allow=0-100
will reserve lcore_ids 6,7 at startup (same as we do now),
and leave [0-5] and [8-100] available for dynamic usage.
 
>  void
>  eal_mcfg_update_internal(void)
>  {
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a7ae0691bf..1cbddc4b5b 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +	lcore_id = eal_lcore_non_eal_allocate();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +	rte_thread_init(lcore_id, &cpuset);
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> +			lcore_id);
> +}
> +
> +void
> +rte_thread_unregister(void)
> +{
> +	unsigned int lcore_id = rte_lcore_id();
> +
> +	if (lcore_id != LCORE_ID_ANY)
> +		eal_lcore_non_eal_release(lcore_id);
> +	rte_thread_uninit();
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
> +			lcore_id);
> +}
> diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
> index 583fcb5953..340e523c6a 100644
> --- a/lib/librte_eal/common/eal_memcfg.h
> +++ b/lib/librte_eal/common/eal_memcfg.h
> @@ -41,6 +41,8 @@ struct rte_mem_config {
>  	rte_rwlock_t memory_hotplug_lock;
>  	/**< Indicates whether memory hotplug request is in progress. */
> 
> +	uint8_t mp_status; /**< Indicates whether multiprocess can be used. */
> +
>  	/* memory segments and zones */
>  	struct rte_fbarray memzones; /**< Memzone descriptors. */
> 
> @@ -91,6 +93,14 @@ eal_mcfg_wait_complete(void);
>  int
>  eal_mcfg_check_version(void);
> 
> +/* mark primary process as not supporting multi-process. */
> +bool
> +eal_mcfg_forbid_multiprocess(void);
> +
> +/* instruct primary process that a secondary process attached once. */
> +bool
> +eal_mcfg_enable_multiprocess(void);
> +
>  /* set mem config as complete */
>  void
>  eal_mcfg_complete(void);
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 0592fcd694..73238ff157 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -396,6 +396,24 @@ uint64_t get_tsc_freq(void);
>   */
>  uint64_t get_tsc_freq_arch(void);
> 
> +/**
> + * Allocate a free lcore to associate to a non-EAL thread.
> + *
> + * @return
> + *   - the id of a lcore with role ROLE_NON_EAL on success.
> + *   - RTE_MAX_LCORE if none was available.
> + */
> +unsigned int eal_lcore_non_eal_allocate(void);
> +
> +/**
> + * Release the lcore used by a non-EAL thread.
> + * Counterpart of eal_lcore_non_eal_allocate().
> + *
> + * @param lcore_id
> + *   The lcore with role ROLE_NON_EAL to release.
> + */
> +void eal_lcore_non_eal_release(unsigned int lcore_id);
> +
>  /**
>   * Prepare physical memory mapping
>   * i.e. hugepages on Linux and
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> index 13e5de006f..32a3d999b8 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -424,6 +424,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 3968c40693..43747e88df 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
>  	ROLE_RTE,
>  	ROLE_OFF,
>  	ROLE_SERVICE,
> +	ROLE_NON_EAL,
>  };
> 
>  /**
> @@ -67,7 +68,8 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
>   *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
>   *
>   * @return
> - *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
> + *  Logical core ID (in EAL thread or registered non-EAL thread) or
> + *  LCORE_ID_ANY (in unregistered non-EAL thread)
>   */
>  static inline unsigned
>  rte_lcore_id(void)
> @@ -279,6 +281,27 @@ int rte_thread_setname(pthread_t id, const char *name);
>  __rte_experimental
>  int rte_thread_getname(pthread_t id, char *name, size_t len);
> 
> +/**
> + * Register current non-EAL thread as a lcore.
> + *
> + * @note This API is not compatible with the multi-process feature:
> + * - if a primary process registers a non-EAL thread, then no secondary process
> + *   will initialise.
> + * - if a secondary process initialises successfully, trying to register a
> + *   non-EAL thread from either primary or secondary processes will always end
> + *   up with the thread getting LCORE_ID_ANY as lcore.
> + */
> +__rte_experimental
> +void
> +rte_thread_register(void);
> +
> +/**
> + * Unregister current thread and release lcore if one was associated.
> + */
> +__rte_experimental
> +void
> +rte_thread_unregister(void);
> +
>  /**
>   * Create a control thread.
>   *
> diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
> index 8894cea50a..1d90d1c0e3 100644
> --- a/lib/librte_eal/linux/eal.c
> +++ b/lib/librte_eal/linux/eal.c
> @@ -514,6 +514,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
> index 5831eea4b0..39c41d445d 100644
> --- a/lib/librte_eal/rte_eal_version.map
> +++ b/lib/librte_eal/rte_eal_version.map
> @@ -396,6 +396,8 @@ EXPERIMENTAL {
> 
>  	# added in 20.08
>  	__rte_trace_mem_per_thread_free;
> +	rte_thread_register;
> +	rte_thread_unregister;
>  };
> 
>  INTERNAL {
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 652d19f9f1..9e0ee052b3 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -28,9 +28,9 @@
>   * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
>   * thread due to the internal per-lcore cache. Due to the lack of caching,
>   * rte_mempool_get() or rte_mempool_put() performance will suffer when called
> - * by non-EAL threads. Instead, non-EAL threads should call
> - * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
> - * created with rte_mempool_cache_create().
> + * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
> + * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
> + * user cache created with rte_mempool_cache_create().
>   */
> 
>  #include <stdio.h>
> @@ -1233,7 +1233,7 @@ void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
>  /**
>   * Create a user-owned mempool cache.
>   *
> - * This can be used by non-EAL threads to enable caching when they
> + * This can be used by unregistered non-EAL threads to enable caching when they
>   * interact with a mempool.
>   *
>   * @param size
> @@ -1264,7 +1264,8 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
>   * @param lcore_id
>   *   The logical core id.
>   * @return
> - *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
> + *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
> + *   thread.
>   */
>  static __rte_always_inline struct rte_mempool_cache *
>  rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
> --
> 2.23.0
Olivier Matz June 30, 2020, 10:07 a.m. UTC | #2
On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> DPDK allows calling some part of its API from a non-EAL thread but this
> has some limitations.
> OVS (and other applications) has its own thread management but still
> want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> faking EAL threads potentially unknown of some DPDK component.
> 
> Introduce a new API to register non-EAL thread and associate them to a
> free lcore with a new NON_EAL role.
> This role denotes lcores that do not run DPDK mainloop and as such
> prevents use of rte_eal_wait_lcore() and consorts.
> 
> Multiprocess is not supported as the need for cohabitation with this new
> feature is unclear at the moment.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> ---
> Changes since v2:
> - refused multiprocess init once rte_thread_register got called, and
>   vice versa,
> - added warning on multiprocess in rte_thread_register doxygen,
> 
> Changes since v1:
> - moved cleanup on lcore role code in patch 5,
> - added unit test,
> - updated documentation,
> - changed naming from "external thread" to "registered non-EAL thread"
> 
> ---
>  MAINTAINERS                                   |   1 +
>  app/test/Makefile                             |   1 +
>  app/test/autotest_data.py                     |   6 +
>  app/test/meson.build                          |   2 +
>  app/test/test_lcores.c                        | 139 ++++++++++++++++++
>  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
>  .../prog_guide/env_abstraction_layer.rst      |  22 +--
>  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
>  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
>  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
>  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
>  lib/librte_eal/common/eal_memcfg.h            |  10 ++
>  lib/librte_eal/common/eal_private.h           |  18 +++
>  lib/librte_eal/freebsd/eal.c                  |   4 +
>  lib/librte_eal/include/rte_lcore.h            |  25 +++-
>  lib/librte_eal/linux/eal.c                    |   4 +
>  lib/librte_eal/rte_eal_version.map            |   2 +
>  lib/librte_mempool/rte_mempool.h              |  11 +-
>  18 files changed, 349 insertions(+), 22 deletions(-)
>  create mode 100644 app/test/test_lcores.c
> 

[...]

> diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> new file mode 100644
> index 0000000000..864bcbade7
> --- /dev/null
> +++ b/app/test/test_lcores.c
> @@ -0,0 +1,139 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2020 Red Hat, Inc.
> + */
> +
> +#include <pthread.h>
> +#include <string.h>
> +
> +#include <rte_lcore.h>
> +
> +#include "test.h"
> +
> +struct thread_context {
> +	enum { INIT, ERROR, DONE } state;
> +	bool lcore_id_any;
> +	pthread_t id;
> +	unsigned int *registered_count;
> +};
> +static void *thread_loop(void *arg)
> +{

missing an empty line here

> +	struct thread_context *t = arg;
> +	unsigned int lcore_id;
> +
> +	lcore_id = rte_lcore_id();
> +	if (lcore_id != LCORE_ID_ANY) {
> +		printf("Incorrect lcore id for new thread %u\n", lcore_id);
> +		t->state = ERROR;
> +	}
> +	rte_thread_register();
> +	lcore_id = rte_lcore_id();
> +	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> +			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> +		printf("Could not register new thread, got %u while %sexpecting %u\n",
> +			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> +		t->state = ERROR;
> +	}

To check if rte_thread_register() succedeed, we need to look at
lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
id on success, and -1 on error (rte_errno could be set to give some
info on the error).

The same could be done for rte_thread_init()

[...]

> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index a7ae0691bf..1cbddc4b5b 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	pthread_join(*thread, NULL);
>  	return -ret;
>  }
> +
> +void
> +rte_thread_register(void)
> +{
> +	unsigned int lcore_id;
> +	rte_cpuset_t cpuset;
> +
> +	/* EAL init flushes all lcores, we can't register before. */
> +	assert(internal_config.init_complete == 1);
> +	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> +			&cpuset) != 0)
> +		CPU_ZERO(&cpuset);
> +	lcore_id = eal_lcore_non_eal_allocate();
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		lcore_id = LCORE_ID_ANY;
> +	rte_thread_init(lcore_id, &cpuset);
> +	if (lcore_id != LCORE_ID_ANY)
> +		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> +			lcore_id);
> +}

So, in this case, the affinity of the pthread is kept and saved, in other
words there is no link between the lcore id and the affinity. It means we
are allowing an application to register lcores for dataplane with conflicting
affinities.

I wonder if it could be useful to have an API that automatically sets
the affinity according to the lcore_id. Or a function that creates a
pthread using the specified lcore id, and setting the correct affinity.
I could simplify the work for applications that want to create/destroy
dataplane threads dynamically.

This could be done later however, just an idea.

[...]
> diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> index 13e5de006f..32a3d999b8 100644
> --- a/lib/librte_eal/freebsd/eal.c
> +++ b/lib/librte_eal/freebsd/eal.c
> @@ -424,6 +424,10 @@ rte_config_init(void)
>  		}
>  		if (rte_eal_config_reattach() < 0)
>  			return -1;
> +		if (!eal_mcfg_enable_multiprocess()) {
> +			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> +			return -1;
> +		}
>  		eal_mcfg_update_internal();
>  		break;
>  	case RTE_PROC_AUTO:
> diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> index 3968c40693..43747e88df 100644
> --- a/lib/librte_eal/include/rte_lcore.h
> +++ b/lib/librte_eal/include/rte_lcore.h
> @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
>  	ROLE_RTE,
>  	ROLE_OFF,
>  	ROLE_SERVICE,
> +	ROLE_NON_EAL,
>  };

If find the name ROLE_NON_EAL a bit heavy (this was also my impression
when reading the doc part).

I understand that there are several types of threads:

- eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
- unregistered (pthread not created by eal, and not registered): ROLE_OFF
  (note that ROLE_OFF also applies for unexistant threads)
- dynamic: pthread not created by eal, but registered

What about using ROLE_DYN ? I'm not sure about this name either, it's just
to open the discussion :)
David Marchand July 1, 2020, 7:13 a.m. UTC | #3
On Tue, Jun 30, 2020 at 12:07 PM Olivier Matz <olivier.matz@6wind.com> wrote:
>
> On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> > DPDK allows calling some part of its API from a non-EAL thread but this
> > has some limitations.
> > OVS (and other applications) has its own thread management but still
> > want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> > faking EAL threads potentially unknown of some DPDK component.
> >
> > Introduce a new API to register non-EAL thread and associate them to a
> > free lcore with a new NON_EAL role.
> > This role denotes lcores that do not run DPDK mainloop and as such
> > prevents use of rte_eal_wait_lcore() and consorts.
> >
> > Multiprocess is not supported as the need for cohabitation with this new
> > feature is unclear at the moment.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > ---
> > Changes since v2:
> > - refused multiprocess init once rte_thread_register got called, and
> >   vice versa,
> > - added warning on multiprocess in rte_thread_register doxygen,
> >
> > Changes since v1:
> > - moved cleanup on lcore role code in patch 5,
> > - added unit test,
> > - updated documentation,
> > - changed naming from "external thread" to "registered non-EAL thread"
> >
> > ---
> >  MAINTAINERS                                   |   1 +
> >  app/test/Makefile                             |   1 +
> >  app/test/autotest_data.py                     |   6 +
> >  app/test/meson.build                          |   2 +
> >  app/test/test_lcores.c                        | 139 ++++++++++++++++++
> >  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
> >  .../prog_guide/env_abstraction_layer.rst      |  22 +--
> >  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
> >  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
> >  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
> >  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
> >  lib/librte_eal/common/eal_memcfg.h            |  10 ++
> >  lib/librte_eal/common/eal_private.h           |  18 +++
> >  lib/librte_eal/freebsd/eal.c                  |   4 +
> >  lib/librte_eal/include/rte_lcore.h            |  25 +++-
> >  lib/librte_eal/linux/eal.c                    |   4 +
> >  lib/librte_eal/rte_eal_version.map            |   2 +
> >  lib/librte_mempool/rte_mempool.h              |  11 +-
> >  18 files changed, 349 insertions(+), 22 deletions(-)
> >  create mode 100644 app/test/test_lcores.c
> >
>
> [...]
>
> > diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> > new file mode 100644
> > index 0000000000..864bcbade7
> > --- /dev/null
> > +++ b/app/test/test_lcores.c
> > @@ -0,0 +1,139 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright (c) 2020 Red Hat, Inc.
> > + */
> > +
> > +#include <pthread.h>
> > +#include <string.h>
> > +
> > +#include <rte_lcore.h>
> > +
> > +#include "test.h"
> > +
> > +struct thread_context {
> > +     enum { INIT, ERROR, DONE } state;
> > +     bool lcore_id_any;
> > +     pthread_t id;
> > +     unsigned int *registered_count;
> > +};
> > +static void *thread_loop(void *arg)
> > +{
>
> missing an empty line here
>
> > +     struct thread_context *t = arg;
> > +     unsigned int lcore_id;
> > +
> > +     lcore_id = rte_lcore_id();
> > +     if (lcore_id != LCORE_ID_ANY) {
> > +             printf("Incorrect lcore id for new thread %u\n", lcore_id);
> > +             t->state = ERROR;
> > +     }
> > +     rte_thread_register();
> > +     lcore_id = rte_lcore_id();
> > +     if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> > +                     (!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> > +             printf("Could not register new thread, got %u while %sexpecting %u\n",
> > +                     lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> > +             t->state = ERROR;
> > +     }
>
> To check if rte_thread_register() succedeed, we need to look at
> lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
> id on success, and -1 on error (rte_errno could be set to give some
> info on the error).

lcore_id are unsigned integers with the special value LCORE_ID_ANY
mapped to UINT32_MAX (should be UINT_MAX? anyway...).

rte_thread_register could return an error code as there are no ERROR
level logs about why a lcore allocation failed.
We could then distinguish a shortage of lcore (or init callback
refusal) from an invalid call before rte_eal_init() or when mp is in
use.

About returning the lcore_id as part of the return code, this would
map to -1 for LCORE_ID_ANY.
This is probably not a problem but still odd.


>
> The same could be done for rte_thread_init()

?
Not sure where this one could fail.


>
> [...]
>
> > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > index a7ae0691bf..1cbddc4b5b 100644
> > --- a/lib/librte_eal/common/eal_common_thread.c
> > +++ b/lib/librte_eal/common/eal_common_thread.c
> > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> >       pthread_join(*thread, NULL);
> >       return -ret;
> >  }
> > +
> > +void
> > +rte_thread_register(void)
> > +{
> > +     unsigned int lcore_id;
> > +     rte_cpuset_t cpuset;
> > +
> > +     /* EAL init flushes all lcores, we can't register before. */
> > +     assert(internal_config.init_complete == 1);
> > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > +                     &cpuset) != 0)
> > +             CPU_ZERO(&cpuset);
> > +     lcore_id = eal_lcore_non_eal_allocate();
> > +     if (lcore_id >= RTE_MAX_LCORE)
> > +             lcore_id = LCORE_ID_ANY;
> > +     rte_thread_init(lcore_id, &cpuset);
> > +     if (lcore_id != LCORE_ID_ANY)
> > +             RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> > +                     lcore_id);
> > +}
>
> So, in this case, the affinity of the pthread is kept and saved, in other
> words there is no link between the lcore id and the affinity. It means we
> are allowing an application to register lcores for dataplane with conflicting
> affinities.

This is not something new, applications using --lcores option already
live with this.
We have warnings in the documentation about non-EAL threads and about
the dangers of conflicting affinities.
Hopefully, the users of this API know what they are doing since they
chose not to use EAL threads.


>
> I wonder if it could be useful to have an API that automatically sets
> the affinity according to the lcore_id. Or a function that creates a
> pthread using the specified lcore id, and setting the correct affinity.
> I could simplify the work for applications that want to create/destroy
> dataplane threads dynamically.

Do you mean EAL threads dynamic creation/suppression?


>
> This could be done later however, just an idea.

For now, I don't see the need.


>
> [...]
> > diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> > index 13e5de006f..32a3d999b8 100644
> > --- a/lib/librte_eal/freebsd/eal.c
> > +++ b/lib/librte_eal/freebsd/eal.c
> > @@ -424,6 +424,10 @@ rte_config_init(void)
> >               }
> >               if (rte_eal_config_reattach() < 0)
> >                       return -1;
> > +             if (!eal_mcfg_enable_multiprocess()) {
> > +                     RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> > +                     return -1;
> > +             }
> >               eal_mcfg_update_internal();
> >               break;
> >       case RTE_PROC_AUTO:
> > diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> > index 3968c40693..43747e88df 100644
> > --- a/lib/librte_eal/include/rte_lcore.h
> > +++ b/lib/librte_eal/include/rte_lcore.h
> > @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
> >       ROLE_RTE,
> >       ROLE_OFF,
> >       ROLE_SERVICE,
> > +     ROLE_NON_EAL,
> >  };
>
> If find the name ROLE_NON_EAL a bit heavy (this was also my impression
> when reading the doc part).
>
> I understand that there are several types of threads:
>
> - eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
> - unregistered (pthread not created by eal, and not registered): ROLE_OFF
>   (note that ROLE_OFF also applies for unexistant threads)
> - dynamic: pthread not created by eal, but registered

Last two cases both are non-EAL threads as described in the doc so far.


>
> What about using ROLE_DYN ? I'm not sure about this name either, it's just
> to open the discussion :)
>

Well, at the moment, all those new lcores are mapped only to non-EAL threads.
A dynamic role feels like you want to take dynamic EAL threads into
account from the start.
I prefer to stick to non-EAL.
Olivier Matz July 1, 2020, 9:11 a.m. UTC | #4
On Wed, Jul 01, 2020 at 09:13:36AM +0200, David Marchand wrote:
> On Tue, Jun 30, 2020 at 12:07 PM Olivier Matz <olivier.matz@6wind.com> wrote:
> >
> > On Fri, Jun 26, 2020 at 04:47:33PM +0200, David Marchand wrote:
> > > DPDK allows calling some part of its API from a non-EAL thread but this
> > > has some limitations.
> > > OVS (and other applications) has its own thread management but still
> > > want to avoid such limitations by hacking RTE_PER_LCORE(_lcore_id) and
> > > faking EAL threads potentially unknown of some DPDK component.
> > >
> > > Introduce a new API to register non-EAL thread and associate them to a
> > > free lcore with a new NON_EAL role.
> > > This role denotes lcores that do not run DPDK mainloop and as such
> > > prevents use of rte_eal_wait_lcore() and consorts.
> > >
> > > Multiprocess is not supported as the need for cohabitation with this new
> > > feature is unclear at the moment.
> > >
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > Acked-by: Andrew Rybchenko <arybchenko@solarflare.com>
> > > ---
> > > Changes since v2:
> > > - refused multiprocess init once rte_thread_register got called, and
> > >   vice versa,
> > > - added warning on multiprocess in rte_thread_register doxygen,
> > >
> > > Changes since v1:
> > > - moved cleanup on lcore role code in patch 5,
> > > - added unit test,
> > > - updated documentation,
> > > - changed naming from "external thread" to "registered non-EAL thread"
> > >
> > > ---
> > >  MAINTAINERS                                   |   1 +
> > >  app/test/Makefile                             |   1 +
> > >  app/test/autotest_data.py                     |   6 +
> > >  app/test/meson.build                          |   2 +
> > >  app/test/test_lcores.c                        | 139 ++++++++++++++++++
> > >  doc/guides/howto/debug_troubleshoot.rst       |   5 +-
> > >  .../prog_guide/env_abstraction_layer.rst      |  22 +--
> > >  doc/guides/prog_guide/mempool_lib.rst         |   2 +-
> > >  lib/librte_eal/common/eal_common_lcore.c      |  50 ++++++-
> > >  lib/librte_eal/common/eal_common_mcfg.c       |  36 +++++
> > >  lib/librte_eal/common/eal_common_thread.c     |  33 +++++
> > >  lib/librte_eal/common/eal_memcfg.h            |  10 ++
> > >  lib/librte_eal/common/eal_private.h           |  18 +++
> > >  lib/librte_eal/freebsd/eal.c                  |   4 +
> > >  lib/librte_eal/include/rte_lcore.h            |  25 +++-
> > >  lib/librte_eal/linux/eal.c                    |   4 +
> > >  lib/librte_eal/rte_eal_version.map            |   2 +
> > >  lib/librte_mempool/rte_mempool.h              |  11 +-
> > >  18 files changed, 349 insertions(+), 22 deletions(-)
> > >  create mode 100644 app/test/test_lcores.c
> > >
> >
> > [...]
> >
> > > diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
> > > new file mode 100644
> > > index 0000000000..864bcbade7
> > > --- /dev/null
> > > +++ b/app/test/test_lcores.c
> > > @@ -0,0 +1,139 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright (c) 2020 Red Hat, Inc.
> > > + */
> > > +
> > > +#include <pthread.h>
> > > +#include <string.h>
> > > +
> > > +#include <rte_lcore.h>
> > > +
> > > +#include "test.h"
> > > +
> > > +struct thread_context {
> > > +     enum { INIT, ERROR, DONE } state;
> > > +     bool lcore_id_any;
> > > +     pthread_t id;
> > > +     unsigned int *registered_count;
> > > +};
> > > +static void *thread_loop(void *arg)
> > > +{
> >
> > missing an empty line here
> >
> > > +     struct thread_context *t = arg;
> > > +     unsigned int lcore_id;
> > > +
> > > +     lcore_id = rte_lcore_id();
> > > +     if (lcore_id != LCORE_ID_ANY) {
> > > +             printf("Incorrect lcore id for new thread %u\n", lcore_id);
> > > +             t->state = ERROR;
> > > +     }
> > > +     rte_thread_register();
> > > +     lcore_id = rte_lcore_id();
> > > +     if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
> > > +                     (!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
> > > +             printf("Could not register new thread, got %u while %sexpecting %u\n",
> > > +                     lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
> > > +             t->state = ERROR;
> > > +     }
> >
> > To check if rte_thread_register() succedeed, we need to look at
> > lcore_id. I wonder if rte_thread_register() shouldn't return the lcore
> > id on success, and -1 on error (rte_errno could be set to give some
> > info on the error).
> 
> lcore_id are unsigned integers with the special value LCORE_ID_ANY
> mapped to UINT32_MAX (should be UINT_MAX? anyway...).
> 
> rte_thread_register could return an error code as there are no ERROR
> level logs about why a lcore allocation failed.
> We could then distinguish a shortage of lcore (or init callback
> refusal) from an invalid call before rte_eal_init() or when mp is in
> use.
> 
> About returning the lcore_id as part of the return code, this would
> map to -1 for LCORE_ID_ANY.
> This is probably not a problem but still odd.

Yes, it would be a bit odd like this. What about changing the definition of
LCORE_ID_ANY to ((unsigned int)-1) ? I think it does not change the effective
value on any architecture, but would make the above change clearer.


> >
> > The same could be done for rte_thread_init()
> 
> ?
> Not sure where this one could fail.

I was thinking about __rte_trace_mem_per_thread_alloc(), but maybe it's not
needed.

> >
> > [...]
> >
> > > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> > > index a7ae0691bf..1cbddc4b5b 100644
> > > --- a/lib/librte_eal/common/eal_common_thread.c
> > > +++ b/lib/librte_eal/common/eal_common_thread.c
> > > @@ -236,3 +236,36 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> > >       pthread_join(*thread, NULL);
> > >       return -ret;
> > >  }
> > > +
> > > +void
> > > +rte_thread_register(void)
> > > +{
> > > +     unsigned int lcore_id;
> > > +     rte_cpuset_t cpuset;
> > > +
> > > +     /* EAL init flushes all lcores, we can't register before. */
> > > +     assert(internal_config.init_complete == 1);
> > > +     if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
> > > +                     &cpuset) != 0)
> > > +             CPU_ZERO(&cpuset);
> > > +     lcore_id = eal_lcore_non_eal_allocate();
> > > +     if (lcore_id >= RTE_MAX_LCORE)
> > > +             lcore_id = LCORE_ID_ANY;
> > > +     rte_thread_init(lcore_id, &cpuset);
> > > +     if (lcore_id != LCORE_ID_ANY)
> > > +             RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
> > > +                     lcore_id);
> > > +}
> >
> > So, in this case, the affinity of the pthread is kept and saved, in other
> > words there is no link between the lcore id and the affinity. It means we
> > are allowing an application to register lcores for dataplane with conflicting
> > affinities.
> 
> This is not something new, applications using --lcores option already
> live with this.
> We have warnings in the documentation about non-EAL threads and about
> the dangers of conflicting affinities.
> Hopefully, the users of this API know what they are doing since they
> chose not to use EAL threads.
> 
> 
> >
> > I wonder if it could be useful to have an API that automatically sets
> > the affinity according to the lcore_id. Or a function that creates a
> > pthread using the specified lcore id, and setting the correct affinity.
> > I could simplify the work for applications that want to create/destroy
> > dataplane threads dynamically.
> 
> Do you mean EAL threads dynamic creation/suppression?
> 
> 
> >
> > This could be done later however, just an idea.
> 
> For now, I don't see the need.
> 
> 
> >
> > [...]
> > > diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
> > > index 13e5de006f..32a3d999b8 100644
> > > --- a/lib/librte_eal/freebsd/eal.c
> > > +++ b/lib/librte_eal/freebsd/eal.c
> > > @@ -424,6 +424,10 @@ rte_config_init(void)
> > >               }
> > >               if (rte_eal_config_reattach() < 0)
> > >                       return -1;
> > > +             if (!eal_mcfg_enable_multiprocess()) {
> > > +                     RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
> > > +                     return -1;
> > > +             }
> > >               eal_mcfg_update_internal();
> > >               break;
> > >       case RTE_PROC_AUTO:
> > > diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
> > > index 3968c40693..43747e88df 100644
> > > --- a/lib/librte_eal/include/rte_lcore.h
> > > +++ b/lib/librte_eal/include/rte_lcore.h
> > > @@ -31,6 +31,7 @@ enum rte_lcore_role_t {
> > >       ROLE_RTE,
> > >       ROLE_OFF,
> > >       ROLE_SERVICE,
> > > +     ROLE_NON_EAL,
> > >  };
> >
> > If find the name ROLE_NON_EAL a bit heavy (this was also my impression
> > when reading the doc part).
> >
> > I understand that there are several types of threads:
> >
> > - eal (pthread created by eal): ROLE_RTE and ROLE_SERVICE
> > - unregistered (pthread not created by eal, and not registered): ROLE_OFF
> >   (note that ROLE_OFF also applies for unexistant threads)
> > - dynamic: pthread not created by eal, but registered
> 
> Last two cases both are non-EAL threads as described in the doc so far.

Yes, but only the last case has the NON_EAL role.
I feel that currently role != thread_type, so may be a bit confusing to
use a pthread_type name in the role enum.

> 
> 
> >
> > What about using ROLE_DYN ? I'm not sure about this name either, it's just
> > to open the discussion :)
> >
> 
> Well, at the moment, all those new lcores are mapped only to non-EAL threads.
> A dynamic role feels like you want to take dynamic EAL threads into
> account from the start.
> I prefer to stick to non-EAL.
> 
> 
> -- 
> David Marchand
>

Patch
diff mbox series

diff --git a/MAINTAINERS b/MAINTAINERS
index 45b6c3a990..88c6d85e07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -182,6 +182,7 @@  F: app/test/test_cycles.c
 F: app/test/test_debug.c
 F: app/test/test_eal*
 F: app/test/test_errno.c
+F: app/test/test_lcores.c
 F: app/test/test_logs.c
 F: app/test/test_memcpy*
 F: app/test/test_per_lcore.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 7b96a03a64..4a8dea2425 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -97,6 +97,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += test_flow_classify.c
 endif
 
 SRCS-y += test_rwlock.c
+SRCS-y += test_lcores.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack.c
 SRCS-$(CONFIG_RTE_LIBRTE_STACK) += test_stack_perf.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 238ab631b4..4b7da45e09 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -62,6 +62,12 @@ 
         "Func":    rwlock_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Lcores autotest",
+        "Command": "lcores_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Logs autotest",
         "Command": "logs_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index 5233ead46e..a57477b7cc 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -67,6 +67,7 @@  test_sources = files('commands.c',
 	'test_ipsec_perf.c',
 	'test_kni.c',
 	'test_kvargs.c',
+	'test_lcores.c',
 	'test_logs.c',
 	'test_lpm.c',
 	'test_lpm6.c',
@@ -206,6 +207,7 @@  fast_tests = [
         ['hash_autotest', true],
         ['interrupt_autotest', true],
         ['ipfrag_autotest', false],
+        ['lcores_autotest', true],
         ['logs_autotest', true],
         ['lpm_autotest', true],
         ['lpm6_autotest', true],
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
new file mode 100644
index 0000000000..864bcbade7
--- /dev/null
+++ b/app/test/test_lcores.c
@@ -0,0 +1,139 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Red Hat, Inc.
+ */
+
+#include <pthread.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#include "test.h"
+
+struct thread_context {
+	enum { INIT, ERROR, DONE } state;
+	bool lcore_id_any;
+	pthread_t id;
+	unsigned int *registered_count;
+};
+static void *thread_loop(void *arg)
+{
+	struct thread_context *t = arg;
+	unsigned int lcore_id;
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Incorrect lcore id for new thread %u\n", lcore_id);
+		t->state = ERROR;
+	}
+	rte_thread_register();
+	lcore_id = rte_lcore_id();
+	if ((t->lcore_id_any && lcore_id != LCORE_ID_ANY) ||
+			(!t->lcore_id_any && lcore_id == LCORE_ID_ANY)) {
+		printf("Could not register new thread, got %u while %sexpecting %u\n",
+			lcore_id, t->lcore_id_any ? "" : "not ", LCORE_ID_ANY);
+		t->state = ERROR;
+	}
+	/* Report register happened to the control thread. */
+	__atomic_add_fetch(t->registered_count, 1, __ATOMIC_RELEASE);
+
+	/* Wait for release from the control thread. */
+	while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+		;
+	rte_thread_unregister();
+	lcore_id = rte_lcore_id();
+	if (lcore_id != LCORE_ID_ANY) {
+		printf("Could not unregister new thread, %u still assigned\n",
+			lcore_id);
+		t->state = ERROR;
+	}
+
+	if (t->state != ERROR)
+		t->state = DONE;
+
+	return NULL;
+}
+
+static int
+test_non_eal_lcores(unsigned int eal_threads_count)
+{
+	struct thread_context thread_contexts[RTE_MAX_LCORE];
+	unsigned int non_eal_threads_count;
+	unsigned int registered_count;
+	struct thread_context *t;
+	unsigned int i;
+	int ret;
+
+	non_eal_threads_count = 0;
+	registered_count = 0;
+
+	/* Try to create as many threads as possible. */
+	for (i = 0; i < RTE_MAX_LCORE - eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		t->state = INIT;
+		t->registered_count = &registered_count;
+		t->lcore_id_any = false;
+		if (pthread_create(&t->id, NULL, thread_loop, t) != 0)
+			break;
+		non_eal_threads_count++;
+	}
+	printf("non-EAL threads count: %u\n", non_eal_threads_count);
+	/* Wait all non-EAL threads to register. */
+	while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+			non_eal_threads_count)
+		;
+
+	/* We managed to create the max number of threads, let's try to create
+	 * one more. This will allow one more check.
+	 */
+	if (eal_threads_count + non_eal_threads_count < RTE_MAX_LCORE)
+		goto skip_lcore_any;
+	t = &thread_contexts[non_eal_threads_count];
+	t->state = INIT;
+	t->registered_count = &registered_count;
+	t->lcore_id_any = true;
+	if (pthread_create(&t->id, NULL, thread_loop, t) == 0) {
+		non_eal_threads_count++;
+		printf("non-EAL threads count: %u\n", non_eal_threads_count);
+		while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+				non_eal_threads_count)
+			;
+	}
+
+skip_lcore_any:
+	/* Release all threads, and check their states. */
+	__atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+	ret = 0;
+	for (i = 0; i < non_eal_threads_count; i++) {
+		t = &thread_contexts[i];
+		pthread_join(t->id, NULL);
+		if (t->state != DONE)
+			ret = -1;
+	}
+
+	return ret;
+}
+
+static int
+test_lcores(void)
+{
+	unsigned int eal_threads_count = 0;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (!rte_lcore_has_role(i, ROLE_OFF))
+			eal_threads_count++;
+	}
+	if (eal_threads_count == 0) {
+		printf("Something is broken, no EAL thread detected.\n");
+		return TEST_FAILED;
+	}
+	printf("EAL threads count: %u, RTE_MAX_LCORE=%u\n", eal_threads_count,
+		RTE_MAX_LCORE);
+
+	if (test_non_eal_lcores(eal_threads_count) < 0)
+		return TEST_FAILED;
+
+	return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(lcores_autotest, test_lcores);
diff --git a/doc/guides/howto/debug_troubleshoot.rst b/doc/guides/howto/debug_troubleshoot.rst
index cef016b2fe..5a46f5fba3 100644
--- a/doc/guides/howto/debug_troubleshoot.rst
+++ b/doc/guides/howto/debug_troubleshoot.rst
@@ -307,8 +307,9 @@  Custom worker function :numref:`dtg_distributor_worker`.
 
 #. Configuration issue isolation
 
-   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF and
-     SERVICE. Check performance functions are mapped to run on the cores.
+   * Identify core role using ``rte_eal_lcore_role`` to identify RTE, OFF,
+     SERVICE and NON_EAL. Check performance functions are mapped to run on the
+     cores.
 
    * For high-performance execution logic ensure running it on correct NUMA
      and non-master core.
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index 48a2fec066..f64ae953d1 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -564,9 +564,13 @@  It's also compatible with the pattern of corelist('-l') option.
 non-EAL pthread support
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-It is possible to use the DPDK execution context with any user pthread (aka. Non-EAL pthreads).
-In a non-EAL pthread, the *_lcore_id* is always LCORE_ID_ANY which identifies that it is not an EAL thread with a valid, unique, *_lcore_id*.
-Some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
+It is possible to use the DPDK execution context with any user pthread (aka. non-EAL pthreads).
+There are two kinds of non-EAL pthreads:
+
+- a registered non-EAL pthread with a valid *_lcore_id* that was successfully assigned by calling ``rte_thread_register()``,
+- a non registered non-EAL pthread with a LCORE_ID_ANY,
+
+For non registered non-EAL pthread (with a LCORE_ID_ANY *_lcore_id*), some libraries will use an alternative unique ID (e.g. TID), some will not be impacted at all, and some will work but with limitations (e.g. timer and mempool libraries).
 
 All these impacts are mentioned in :ref:`known_issue_label` section.
 
@@ -613,9 +617,9 @@  Known Issues
 + rte_mempool
 
   The rte_mempool uses a per-lcore cache inside the mempool.
-  For non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
-  So for now, when rte_mempool is used with non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
-  Only user-owned external caches can be used in a non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
+  For unregistered non-EAL pthreads, ``rte_lcore_id()`` will not return a valid number.
+  So for now, when rte_mempool is used with unregistered non-EAL pthreads, the put/get operations will bypass the default mempool cache and there is a performance penalty because of this bypass.
+  Only user-owned external caches can be used in an unregistered non-EAL context in conjunction with ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()`` that accept an explicit cache parameter.
 
 + rte_ring
 
@@ -660,15 +664,15 @@  Known Issues
 
 + rte_timer
 
-  Running  ``rte_timer_manage()`` on a non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
+  Running  ``rte_timer_manage()`` on an unregistered non-EAL pthread is not allowed. However, resetting/stopping the timer from a non-EAL pthread is allowed.
 
 + rte_log
 
-  In non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
+  In unregistered non-EAL pthreads, there is no per thread loglevel and logtype, global loglevels are used.
 
 + misc
 
-  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in a non-EAL pthread.
+  The debug statistics of rte_ring, rte_mempool and rte_timer are not supported in an unregistered non-EAL pthread.
 
 cgroup control
 ~~~~~~~~~~~~~~
diff --git a/doc/guides/prog_guide/mempool_lib.rst b/doc/guides/prog_guide/mempool_lib.rst
index f8b430d656..e3e1f940be 100644
--- a/doc/guides/prog_guide/mempool_lib.rst
+++ b/doc/guides/prog_guide/mempool_lib.rst
@@ -103,7 +103,7 @@  The maximum size of the cache is static and is defined at compilation time (CONF
 Alternatively to the internal default per-lcore local cache, an application can create and manage external caches through the ``rte_mempool_cache_create()``, ``rte_mempool_cache_free()`` and ``rte_mempool_cache_flush()`` calls.
 These user-owned caches can be explicitly passed to ``rte_mempool_generic_put()`` and ``rte_mempool_generic_get()``.
 The ``rte_mempool_default_cache()`` call returns the default internal cache if any.
-In contrast to the default caches, user-owned caches can be used by non-EAL threads too.
+In contrast to the default caches, user-owned caches can be used by unregistered non-EAL threads too.
 
 Mempool Handlers
 ------------------------
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 86d32a3dd7..a61824a779 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -6,13 +6,15 @@ 
 #include <limits.h>
 #include <string.h>
 
-#include <rte_errno.h>
-#include <rte_log.h>
-#include <rte_eal.h>
-#include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
 
+#include "eal_memcfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
 
@@ -220,3 +222,43 @@  rte_socket_id_by_idx(unsigned int idx)
 	}
 	return config->numa_nodes[idx];
 }
+
+static rte_spinlock_t lcore_lock = RTE_SPINLOCK_INITIALIZER;
+
+unsigned int
+eal_lcore_non_eal_allocate(void)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	unsigned int lcore_id;
+
+	if (cfg->process_type == RTE_PROC_SECONDARY ||
+			!eal_mcfg_forbid_multiprocess()) {
+		RTE_LOG(ERR, EAL, "Multiprocess in use, cannot allocate new lcore.\n");
+		return RTE_MAX_LCORE;
+	}
+	rte_spinlock_lock(&lcore_lock);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+			continue;
+		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_count++;
+		break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		RTE_LOG(DEBUG, EAL, "No lcore available.\n");
+	rte_spinlock_unlock(&lcore_lock);
+	return lcore_id;
+}
+
+void
+eal_lcore_non_eal_release(unsigned int lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	rte_spinlock_lock(&lcore_lock);
+	if (cfg->lcore_role[lcore_id] == ROLE_NON_EAL) {
+		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_count--;
+	}
+	rte_spinlock_unlock(&lcore_lock);
+}
diff --git a/lib/librte_eal/common/eal_common_mcfg.c b/lib/librte_eal/common/eal_common_mcfg.c
index 49d3ed0ce5..5b42d454e2 100644
--- a/lib/librte_eal/common/eal_common_mcfg.c
+++ b/lib/librte_eal/common/eal_common_mcfg.c
@@ -44,6 +44,42 @@  eal_mcfg_check_version(void)
 	return 0;
 }
 
+enum mp_status {
+	MP_UNKNOWN,
+	MP_FORBIDDEN,
+	MP_ENABLED,
+};
+
+static bool
+eal_mcfg_set_mp_status(enum mp_status status)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint8_t expected;
+	uint8_t desired;
+
+	expected = MP_UNKNOWN;
+	desired = status;
+	if (__atomic_compare_exchange_n(&mcfg->mp_status, &expected, desired,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+		return true;
+
+	return __atomic_load_n(&mcfg->mp_status, __ATOMIC_RELAXED) == desired;
+}
+
+bool
+eal_mcfg_forbid_multiprocess(void)
+{
+	assert(rte_eal_get_configuration()->process_type == RTE_PROC_PRIMARY);
+	return eal_mcfg_set_mp_status(MP_FORBIDDEN);
+}
+
+bool
+eal_mcfg_enable_multiprocess(void)
+{
+	assert(rte_eal_get_configuration()->process_type == RTE_PROC_SECONDARY);
+	return eal_mcfg_set_mp_status(MP_ENABLED);
+}
+
 void
 eal_mcfg_update_internal(void)
 {
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index a7ae0691bf..1cbddc4b5b 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -236,3 +236,36 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	pthread_join(*thread, NULL);
 	return -ret;
 }
+
+void
+rte_thread_register(void)
+{
+	unsigned int lcore_id;
+	rte_cpuset_t cpuset;
+
+	/* EAL init flushes all lcores, we can't register before. */
+	assert(internal_config.init_complete == 1);
+	if (pthread_getaffinity_np(pthread_self(), sizeof(cpuset),
+			&cpuset) != 0)
+		CPU_ZERO(&cpuset);
+	lcore_id = eal_lcore_non_eal_allocate();
+	if (lcore_id >= RTE_MAX_LCORE)
+		lcore_id = LCORE_ID_ANY;
+	rte_thread_init(lcore_id, &cpuset);
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Registered non-EAL thread as lcore %u.\n",
+			lcore_id);
+}
+
+void
+rte_thread_unregister(void)
+{
+	unsigned int lcore_id = rte_lcore_id();
+
+	if (lcore_id != LCORE_ID_ANY)
+		eal_lcore_non_eal_release(lcore_id);
+	rte_thread_uninit();
+	if (lcore_id != LCORE_ID_ANY)
+		RTE_LOG(DEBUG, EAL, "Unregistered non-EAL thread (was lcore %u).\n",
+			lcore_id);
+}
diff --git a/lib/librte_eal/common/eal_memcfg.h b/lib/librte_eal/common/eal_memcfg.h
index 583fcb5953..340e523c6a 100644
--- a/lib/librte_eal/common/eal_memcfg.h
+++ b/lib/librte_eal/common/eal_memcfg.h
@@ -41,6 +41,8 @@  struct rte_mem_config {
 	rte_rwlock_t memory_hotplug_lock;
 	/**< Indicates whether memory hotplug request is in progress. */
 
+	uint8_t mp_status; /**< Indicates whether multiprocess can be used. */
+
 	/* memory segments and zones */
 	struct rte_fbarray memzones; /**< Memzone descriptors. */
 
@@ -91,6 +93,14 @@  eal_mcfg_wait_complete(void);
 int
 eal_mcfg_check_version(void);
 
+/* mark primary process as not supporting multi-process. */
+bool
+eal_mcfg_forbid_multiprocess(void);
+
+/* instruct primary process that a secondary process attached once. */
+bool
+eal_mcfg_enable_multiprocess(void);
+
 /* set mem config as complete */
 void
 eal_mcfg_complete(void);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0592fcd694..73238ff157 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -396,6 +396,24 @@  uint64_t get_tsc_freq(void);
  */
 uint64_t get_tsc_freq_arch(void);
 
+/**
+ * Allocate a free lcore to associate to a non-EAL thread.
+ *
+ * @return
+ *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - RTE_MAX_LCORE if none was available.
+ */
+unsigned int eal_lcore_non_eal_allocate(void);
+
+/**
+ * Release the lcore used by a non-EAL thread.
+ * Counterpart of eal_lcore_non_eal_allocate().
+ *
+ * @param lcore_id
+ *   The lcore with role ROLE_NON_EAL to release.
+ */
+void eal_lcore_non_eal_release(unsigned int lcore_id);
+
 /**
  * Prepare physical memory mapping
  * i.e. hugepages on Linux and
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 13e5de006f..32a3d999b8 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -424,6 +424,10 @@  rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_mcfg_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
index 3968c40693..43747e88df 100644
--- a/lib/librte_eal/include/rte_lcore.h
+++ b/lib/librte_eal/include/rte_lcore.h
@@ -31,6 +31,7 @@  enum rte_lcore_role_t {
 	ROLE_RTE,
 	ROLE_OFF,
 	ROLE_SERVICE,
+	ROLE_NON_EAL,
 };
 
 /**
@@ -67,7 +68,8 @@  rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
  *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
  *
  * @return
- *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ *  Logical core ID (in EAL thread or registered non-EAL thread) or
+ *  LCORE_ID_ANY (in unregistered non-EAL thread)
  */
 static inline unsigned
 rte_lcore_id(void)
@@ -279,6 +281,27 @@  int rte_thread_setname(pthread_t id, const char *name);
 __rte_experimental
 int rte_thread_getname(pthread_t id, char *name, size_t len);
 
+/**
+ * Register current non-EAL thread as a lcore.
+ *
+ * @note This API is not compatible with the multi-process feature:
+ * - if a primary process registers a non-EAL thread, then no secondary process
+ *   will initialise.
+ * - if a secondary process initialises successfully, trying to register a
+ *   non-EAL thread from either primary or secondary processes will always end
+ *   up with the thread getting LCORE_ID_ANY as lcore.
+ */
+__rte_experimental
+void
+rte_thread_register(void);
+
+/**
+ * Unregister current thread and release lcore if one was associated.
+ */
+__rte_experimental
+void
+rte_thread_unregister(void);
+
 /**
  * Create a control thread.
  *
diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
index 8894cea50a..1d90d1c0e3 100644
--- a/lib/librte_eal/linux/eal.c
+++ b/lib/librte_eal/linux/eal.c
@@ -514,6 +514,10 @@  rte_config_init(void)
 		}
 		if (rte_eal_config_reattach() < 0)
 			return -1;
+		if (!eal_mcfg_enable_multiprocess()) {
+			RTE_LOG(ERR, EAL, "Primary process refused secondary attachment\n");
+			return -1;
+		}
 		eal_mcfg_update_internal();
 		break;
 	case RTE_PROC_AUTO:
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 5831eea4b0..39c41d445d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -396,6 +396,8 @@  EXPERIMENTAL {
 
 	# added in 20.08
 	__rte_trace_mem_per_thread_free;
+	rte_thread_register;
+	rte_thread_unregister;
 };
 
 INTERNAL {
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 652d19f9f1..9e0ee052b3 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -28,9 +28,9 @@ 
  * rte_mempool_get() or rte_mempool_put() are designed to be called from an EAL
  * thread due to the internal per-lcore cache. Due to the lack of caching,
  * rte_mempool_get() or rte_mempool_put() performance will suffer when called
- * by non-EAL threads. Instead, non-EAL threads should call
- * rte_mempool_generic_get() or rte_mempool_generic_put() with a user cache
- * created with rte_mempool_cache_create().
+ * by unregistered non-EAL threads. Instead, unregistered non-EAL threads
+ * should call rte_mempool_generic_get() or rte_mempool_generic_put() with a
+ * user cache created with rte_mempool_cache_create().
  */
 
 #include <stdio.h>
@@ -1233,7 +1233,7 @@  void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
 /**
  * Create a user-owned mempool cache.
  *
- * This can be used by non-EAL threads to enable caching when they
+ * This can be used by unregistered non-EAL threads to enable caching when they
  * interact with a mempool.
  *
  * @param size
@@ -1264,7 +1264,8 @@  rte_mempool_cache_free(struct rte_mempool_cache *cache);
  * @param lcore_id
  *   The logical core id.
  * @return
- *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ *   A pointer to the mempool cache or NULL if disabled or unregistered non-EAL
+ *   thread.
  */
 static __rte_always_inline struct rte_mempool_cache *
 rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)