[dpdk-dev,RFC,1/7] eal: add linear thread id as pthread-local variable
Commit Message
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_eal/common/include/rte_eal.h | 5 ++
lib/librte_eal/common/include/rte_lcore.h | 12 ++++
lib/librte_eal/linuxapp/eal/eal_thread.c | 115 ++++++++++++++++++++++++++++--
3 files changed, 126 insertions(+), 6 deletions(-)
Comments
On 12/11/2014 10:05 AM, Cunming Liang wrote:
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
> lib/librte_eal/common/include/rte_eal.h | 5 ++
> lib/librte_eal/common/include/rte_lcore.h | 12 ++++
> lib/librte_eal/linuxapp/eal/eal_thread.c | 115 ++++++++++++++++++++++++++++--
> 3 files changed, 126 insertions(+), 6 deletions(-)
>
> diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
> index f4ecd2e..2640167 100644
> --- a/lib/librte_eal/common/include/rte_eal.h
> +++ b/lib/librte_eal/common/include/rte_eal.h
> @@ -262,6 +262,11 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func );
> */
> int rte_eal_has_hugepages(void);
>
> +#ifndef RTE_MAX_THREAD
> +#define RTE_MAX_THREAD RTE_MAX_LCORE
> +#endif
> +
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
> index 49b2c03..cd83d47 100644
> --- a/lib/librte_eal/common/include/rte_lcore.h
> +++ b/lib/librte_eal/common/include/rte_lcore.h
> @@ -73,6 +73,7 @@ struct lcore_config {
> extern struct lcore_config lcore_config[RTE_MAX_LCORE];
>
> RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per core "core id". */
> +RTE_DECLARE_PER_LCORE(unsigned, _thread_id); /**< Per thread "linear tid". */
>
> /**
> * Return the ID of the execution unit we are running on.
> @@ -86,6 +87,17 @@ rte_lcore_id(void)
> }
>
> /**
> + * Return the linear thread ID of the cache unit we are running on.
> + * @return
> + * core ID
> + */
> +static inline unsigned long
> +rte_linear_thread_id(void)
> +{
> + return RTE_PER_LCORE(_thread_id);
> +}
> +
> +/**
> * Get the id of the master lcore
> *
> * @return
> diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
> index 80a985f..52478d6 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
> @@ -39,6 +39,7 @@
> #include <pthread.h>
> #include <sched.h>
> #include <sys/queue.h>
> +#include <string.h>
>
> #include <rte_debug.h>
> #include <rte_atomic.h>
> @@ -51,12 +52,19 @@
> #include <rte_eal.h>
> #include <rte_per_lcore.h>
> #include <rte_lcore.h>
> +#include <rte_spinlock.h>
> +#include <rte_common.h>
>
> #include "eal_private.h"
> #include "eal_thread.h"
>
> +#define LINEAR_THREAD_ID_POOL "THREAD_ID_POOL"
> +
> RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);
>
> +/* define linear thread id as thread-local variables */
> +RTE_DEFINE_PER_LCORE(unsigned, _thread_id);
> +
> /*
> * Send a message to a slave lcore identified by slave_id to call a
> * function f with argument arg. Once the execution is done, the
> @@ -94,12 +102,13 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
> return 0;
> }
>
> +
> /* set affinity for current thread */
> static int
> -eal_thread_set_affinity(void)
> +__eal_thread_set_affinity(pthread_t thread, unsigned lcore)
> {
> +
> int s;
> - pthread_t thread;
>
> /*
> * According to the section VERSIONS of the CPU_ALLOC man page:
> @@ -126,9 +135,8 @@ eal_thread_set_affinity(void)
>
> size = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
> CPU_ZERO_S(size, cpusetp);
> - CPU_SET_S(rte_lcore_id(), size, cpusetp);
> + CPU_SET_S(lcore, size, cpusetp);
>
> - thread = pthread_self();
> s = pthread_setaffinity_np(thread, size, cpusetp);
> if (s != 0) {
> RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> @@ -140,9 +148,8 @@ eal_thread_set_affinity(void)
> #else /* CPU_ALLOC */
> cpu_set_t cpuset;
> CPU_ZERO( &cpuset );
> - CPU_SET( rte_lcore_id(), &cpuset );
> + CPU_SET(lcore, &cpuset );
>
> - thread = pthread_self();
> s = pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);
> if (s != 0) {
> RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> @@ -152,6 +159,15 @@ eal_thread_set_affinity(void)
> return 0;
> }
>
> +/* set affinity for current thread */
> +static int
> +eal_thread_set_affinity(void)
> +{
> + pthread_t thread = pthread_self();
> +
> + return __eal_thread_set_affinity(thread, rte_lcore_id());
> +}
> +
> void eal_thread_init_master(unsigned lcore_id)
> {
> /* set the lcore ID in per-lcore memory area */
> @@ -162,6 +178,87 @@ void eal_thread_init_master(unsigned lcore_id)
> rte_panic("cannot set affinity\n");
> }
>
> +/* linear thread id control block */
> +struct eal_thread_cb {
> + rte_spinlock_t lock;
> + uint64_t nb_bucket;
> + uint64_t bitmap[0];
> +};
> +
Can this struct been declared in header files?
> +static struct eal_thread_cb *
> +__create_tid_pool(void)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint64_t sz;
> + uint64_t nb_bucket;
> +
> + nb_bucket = RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) / 64;
Is it better to replace division to right shift?
nb_bucket = RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) >> 6;
> + sz = sizeof(*pcb) + nb_bucket * sizeof(uint64_t);
> + mz = rte_memzone_reserve(LINEAR_THREAD_ID_POOL,
> + sz, rte_socket_id(), 0);
> + if (mz == NULL)
> + rte_panic("Cannot allocate linear thread ID pool\n");
> +
> + pcb = mz->addr;
> + rte_spinlock_init(&pcb->lock);
> + pcb->nb_bucket = nb_bucket;
> + memset(pcb->bitmap, 0, nb_bucket * sizeof(uint64_t));
> +
> + return pcb;
> +}
> +
> +static int
> +__get_linear_tid(uint64_t *tid)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint64_t i;
> + uint8_t shift = 0;
> +
> + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> + if (mz != NULL)
> + pcb = mz->addr;
> + else
> + pcb = __create_tid_pool();
> +
> + rte_spinlock_lock(&pcb->lock);
> + for (i = 0; i < pcb->nb_bucket; i++) {
> + if (pcb->bitmap[i] == (uint64_t)-1)
It is better for bitmap as ~0(or ~(uint64_t)0) instead of (uint64_t)-1
for all bit set.
> + continue;
> + shift = 0;
> + while (pcb->bitmap[i] & (1UL << shift))
> + shift ++;
> + pcb->bitmap[i] |= (1UL << shift);
> + break;
> + }
> + rte_spinlock_unlock(&pcb->lock);
> +
> + if (i == pcb->nb_bucket)
> + return -1;
> +
> + *tid = i * 64 + shift;
> + return 0;
> +}
> +
> +static void __rte_unused
> +__put_linear_tid(uint64_t tid)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint8_t shift;
> +
> + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> + if (!mz)
> + return;
> +
> + pcb = mz->addr;
> + rte_spinlock_lock(&pcb->lock);
> + shift = tid & 0x3F;
> + pcb->bitmap[tid / 64] &= ~(1UL << shift);
tid >> 6
> + rte_spinlock_unlock(&pcb->lock);
> +}
> +
> /* main loop of threads */
> __attribute__((noreturn)) void *
> eal_thread_loop(__attribute__((unused)) void *arg)
> @@ -169,6 +266,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> char c;
> int n, ret;
> unsigned lcore_id;
> + unsigned long ltid = 0;
> pthread_t thread_id;
> int m2s, s2m;
>
> @@ -191,6 +289,11 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> /* set the lcore ID in per-lcore memory area */
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> + /* set the linear thread ID in per-lcore memory area */
> + if (__get_linear_tid(<id) < 0)
> + rte_panic("cannot get cache slot id\n");
> + RTE_PER_LCORE(_thread_id) = ltid;
> +
> /* set CPU affinity */
> if (eal_thread_set_affinity() < 0)
> rte_panic("cannot set affinity\n");
Hi Steve,
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Cunming Liang
> Sent: Thursday, December 11, 2014 2:05 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [RFC PATCH 1/7] eal: add linear thread id as pthread-local variable
>
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
> lib/librte_eal/common/include/rte_eal.h | 5 ++
> lib/librte_eal/common/include/rte_lcore.h | 12 ++++
> lib/librte_eal/linuxapp/eal/eal_thread.c | 115 ++++++++++++++++++++++++++++--
> 3 files changed, 126 insertions(+), 6 deletions(-)
>
> diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
> index f4ecd2e..2640167 100644
> --- a/lib/librte_eal/common/include/rte_eal.h
> +++ b/lib/librte_eal/common/include/rte_eal.h
> @@ -262,6 +262,11 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func );
> */
> int rte_eal_has_hugepages(void);
>
> +#ifndef RTE_MAX_THREAD
> +#define RTE_MAX_THREAD RTE_MAX_LCORE
> +#endif
> +
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
> index 49b2c03..cd83d47 100644
> --- a/lib/librte_eal/common/include/rte_lcore.h
> +++ b/lib/librte_eal/common/include/rte_lcore.h
> @@ -73,6 +73,7 @@ struct lcore_config {
> extern struct lcore_config lcore_config[RTE_MAX_LCORE];
>
> RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per core "core id". */
> +RTE_DECLARE_PER_LCORE(unsigned, _thread_id); /**< Per thread "linear tid". */
>
> /**
> * Return the ID of the execution unit we are running on.
> @@ -86,6 +87,17 @@ rte_lcore_id(void)
> }
>
> /**
> + * Return the linear thread ID of the cache unit we are running on.
> + * @return
> + * core ID
> + */
> +static inline unsigned long
> +rte_linear_thread_id(void)
> +{
> + return RTE_PER_LCORE(_thread_id);
> +}
> +
> +/**
> * Get the id of the master lcore
> *
> * @return
> diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
> index 80a985f..52478d6 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
> @@ -39,6 +39,7 @@
> #include <pthread.h>
> #include <sched.h>
> #include <sys/queue.h>
> +#include <string.h>
>
> #include <rte_debug.h>
> #include <rte_atomic.h>
> @@ -51,12 +52,19 @@
> #include <rte_eal.h>
> #include <rte_per_lcore.h>
> #include <rte_lcore.h>
> +#include <rte_spinlock.h>
> +#include <rte_common.h>
>
> #include "eal_private.h"
> #include "eal_thread.h"
>
> +#define LINEAR_THREAD_ID_POOL "THREAD_ID_POOL"
> +
> RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);
>
> +/* define linear thread id as thread-local variables */
> +RTE_DEFINE_PER_LCORE(unsigned, _thread_id);
> +
> /*
> * Send a message to a slave lcore identified by slave_id to call a
> * function f with argument arg. Once the execution is done, the
> @@ -94,12 +102,13 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
> return 0;
> }
>
> +
> /* set affinity for current thread */
> static int
> -eal_thread_set_affinity(void)
> +__eal_thread_set_affinity(pthread_t thread, unsigned lcore)
> {
> +
> int s;
> - pthread_t thread;
>
> /*
> * According to the section VERSIONS of the CPU_ALLOC man page:
> @@ -126,9 +135,8 @@ eal_thread_set_affinity(void)
>
> size = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
> CPU_ZERO_S(size, cpusetp);
> - CPU_SET_S(rte_lcore_id(), size, cpusetp);
> + CPU_SET_S(lcore, size, cpusetp);
>
> - thread = pthread_self();
> s = pthread_setaffinity_np(thread, size, cpusetp);
> if (s != 0) {
> RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> @@ -140,9 +148,8 @@ eal_thread_set_affinity(void)
> #else /* CPU_ALLOC */
> cpu_set_t cpuset;
> CPU_ZERO( &cpuset );
> - CPU_SET( rte_lcore_id(), &cpuset );
> + CPU_SET(lcore, &cpuset );
>
> - thread = pthread_self();
> s = pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);
> if (s != 0) {
> RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> @@ -152,6 +159,15 @@ eal_thread_set_affinity(void)
> return 0;
> }
>
> +/* set affinity for current thread */
> +static int
> +eal_thread_set_affinity(void)
> +{
> + pthread_t thread = pthread_self();
> +
> + return __eal_thread_set_affinity(thread, rte_lcore_id());
> +}
> +
> void eal_thread_init_master(unsigned lcore_id)
> {
> /* set the lcore ID in per-lcore memory area */
> @@ -162,6 +178,87 @@ void eal_thread_init_master(unsigned lcore_id)
> rte_panic("cannot set affinity\n");
> }
>
> +/* linear thread id control block */
> +struct eal_thread_cb {
> + rte_spinlock_t lock;
> + uint64_t nb_bucket;
> + uint64_t bitmap[0];
> +};
> +
> +static struct eal_thread_cb *
> +__create_tid_pool(void)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint64_t sz;
> + uint64_t nb_bucket;
> +
> + nb_bucket = RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) / 64;
> + sz = sizeof(*pcb) + nb_bucket * sizeof(uint64_t);
> + mz = rte_memzone_reserve(LINEAR_THREAD_ID_POOL,
> + sz, rte_socket_id(), 0);
> + if (mz == NULL)
> + rte_panic("Cannot allocate linear thread ID pool\n");
> +
> + pcb = mz->addr;
> + rte_spinlock_init(&pcb->lock);
> + pcb->nb_bucket = nb_bucket;
> + memset(pcb->bitmap, 0, nb_bucket * sizeof(uint64_t));
> +
> + return pcb;
> +}
> +
> +static int
> +__get_linear_tid(uint64_t *tid)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint64_t i;
> + uint8_t shift = 0;
> +
> + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> + if (mz != NULL)
> + pcb = mz->addr;
> + else
> + pcb = __create_tid_pool();
As I understand, __get_linear_tid() could be call concurrently from different threads?
If so, then I think we can have a race conditions here with memzone_lookup/memzone_create.
Probably the easiest way to avoid it - make sure that __create_tid_pool() will be called at startup,
when app is still single-threaded and secondary processes are still waiting for primary.
Something like create: rte_eal_tid_init() and call it somewhere in rte_eal_init(), before rte_eal_mcfg_complete().
Konstantin
> +
> + rte_spinlock_lock(&pcb->lock);
> + for (i = 0; i < pcb->nb_bucket; i++) {
> + if (pcb->bitmap[i] == (uint64_t)-1)
> + continue;
> + shift = 0;
> + while (pcb->bitmap[i] & (1UL << shift))
> + shift ++;
> + pcb->bitmap[i] |= (1UL << shift);
> + break;
> + }
> + rte_spinlock_unlock(&pcb->lock);
> +
> + if (i == pcb->nb_bucket)
> + return -1;
> +
> + *tid = i * 64 + shift;
> + return 0;
> +}
> +
> +static void __rte_unused
> +__put_linear_tid(uint64_t tid)
> +{
> + const struct rte_memzone *mz;
> + struct eal_thread_cb *pcb;
> + uint8_t shift;
> +
> + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> + if (!mz)
> + return;
> +
> + pcb = mz->addr;
> + rte_spinlock_lock(&pcb->lock);
> + shift = tid & 0x3F;
> + pcb->bitmap[tid / 64] &= ~(1UL << shift);
> + rte_spinlock_unlock(&pcb->lock);
> +}
> +
> /* main loop of threads */
> __attribute__((noreturn)) void *
> eal_thread_loop(__attribute__((unused)) void *arg)
> @@ -169,6 +266,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> char c;
> int n, ret;
> unsigned lcore_id;
> + unsigned long ltid = 0;
> pthread_t thread_id;
> int m2s, s2m;
>
> @@ -191,6 +289,11 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> /* set the lcore ID in per-lcore memory area */
> RTE_PER_LCORE(_lcore_id) = lcore_id;
>
> + /* set the linear thread ID in per-lcore memory area */
> + if (__get_linear_tid(<id) < 0)
> + rte_panic("cannot get cache slot id\n");
> + RTE_PER_LCORE(_thread_id) = ltid;
> +
> /* set CPU affinity */
> if (eal_thread_set_affinity() < 0)
> rte_panic("cannot set affinity\n");
> --
> 1.8.1.4
Thanks Konstantin, it makes sense.
> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Tuesday, December 23, 2014 3:02 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: RE: [dpdk-dev] [RFC PATCH 1/7] eal: add linear thread id as pthread-local
> variable
>
> Hi Steve,
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Cunming Liang
> > Sent: Thursday, December 11, 2014 2:05 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [RFC PATCH 1/7] eal: add linear thread id as pthread-local
> variable
> >
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> > lib/librte_eal/common/include/rte_eal.h | 5 ++
> > lib/librte_eal/common/include/rte_lcore.h | 12 ++++
> > lib/librte_eal/linuxapp/eal/eal_thread.c | 115
> ++++++++++++++++++++++++++++--
> > 3 files changed, 126 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_eal.h
> b/lib/librte_eal/common/include/rte_eal.h
> > index f4ecd2e..2640167 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -262,6 +262,11 @@ rte_set_application_usage_hook( rte_usage_hook_t
> usage_func );
> > */
> > int rte_eal_has_hugepages(void);
> >
> > +#ifndef RTE_MAX_THREAD
> > +#define RTE_MAX_THREAD RTE_MAX_LCORE
> > +#endif
> > +
> > +
> > #ifdef __cplusplus
> > }
> > #endif
> > diff --git a/lib/librte_eal/common/include/rte_lcore.h
> b/lib/librte_eal/common/include/rte_lcore.h
> > index 49b2c03..cd83d47 100644
> > --- a/lib/librte_eal/common/include/rte_lcore.h
> > +++ b/lib/librte_eal/common/include/rte_lcore.h
> > @@ -73,6 +73,7 @@ struct lcore_config {
> > extern struct lcore_config lcore_config[RTE_MAX_LCORE];
> >
> > RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per core "core id". */
> > +RTE_DECLARE_PER_LCORE(unsigned, _thread_id); /**< Per thread "linear tid".
> */
> >
> > /**
> > * Return the ID of the execution unit we are running on.
> > @@ -86,6 +87,17 @@ rte_lcore_id(void)
> > }
> >
> > /**
> > + * Return the linear thread ID of the cache unit we are running on.
> > + * @return
> > + * core ID
> > + */
> > +static inline unsigned long
> > +rte_linear_thread_id(void)
> > +{
> > + return RTE_PER_LCORE(_thread_id);
> > +}
> > +
> > +/**
> > * Get the id of the master lcore
> > *
> > * @return
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c
> b/lib/librte_eal/linuxapp/eal/eal_thread.c
> > index 80a985f..52478d6 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
> > @@ -39,6 +39,7 @@
> > #include <pthread.h>
> > #include <sched.h>
> > #include <sys/queue.h>
> > +#include <string.h>
> >
> > #include <rte_debug.h>
> > #include <rte_atomic.h>
> > @@ -51,12 +52,19 @@
> > #include <rte_eal.h>
> > #include <rte_per_lcore.h>
> > #include <rte_lcore.h>
> > +#include <rte_spinlock.h>
> > +#include <rte_common.h>
> >
> > #include "eal_private.h"
> > #include "eal_thread.h"
> >
> > +#define LINEAR_THREAD_ID_POOL "THREAD_ID_POOL"
> > +
> > RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);
> >
> > +/* define linear thread id as thread-local variables */
> > +RTE_DEFINE_PER_LCORE(unsigned, _thread_id);
> > +
> > /*
> > * Send a message to a slave lcore identified by slave_id to call a
> > * function f with argument arg. Once the execution is done, the
> > @@ -94,12 +102,13 @@ rte_eal_remote_launch(int (*f)(void *), void *arg,
> unsigned slave_id)
> > return 0;
> > }
> >
> > +
> > /* set affinity for current thread */
> > static int
> > -eal_thread_set_affinity(void)
> > +__eal_thread_set_affinity(pthread_t thread, unsigned lcore)
> > {
> > +
> > int s;
> > - pthread_t thread;
> >
> > /*
> > * According to the section VERSIONS of the CPU_ALLOC man page:
> > @@ -126,9 +135,8 @@ eal_thread_set_affinity(void)
> >
> > size = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
> > CPU_ZERO_S(size, cpusetp);
> > - CPU_SET_S(rte_lcore_id(), size, cpusetp);
> > + CPU_SET_S(lcore, size, cpusetp);
> >
> > - thread = pthread_self();
> > s = pthread_setaffinity_np(thread, size, cpusetp);
> > if (s != 0) {
> > RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> > @@ -140,9 +148,8 @@ eal_thread_set_affinity(void)
> > #else /* CPU_ALLOC */
> > cpu_set_t cpuset;
> > CPU_ZERO( &cpuset );
> > - CPU_SET( rte_lcore_id(), &cpuset );
> > + CPU_SET(lcore, &cpuset );
> >
> > - thread = pthread_self();
> > s = pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);
> > if (s != 0) {
> > RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
> > @@ -152,6 +159,15 @@ eal_thread_set_affinity(void)
> > return 0;
> > }
> >
> > +/* set affinity for current thread */
> > +static int
> > +eal_thread_set_affinity(void)
> > +{
> > + pthread_t thread = pthread_self();
> > +
> > + return __eal_thread_set_affinity(thread, rte_lcore_id());
> > +}
> > +
> > void eal_thread_init_master(unsigned lcore_id)
> > {
> > /* set the lcore ID in per-lcore memory area */
> > @@ -162,6 +178,87 @@ void eal_thread_init_master(unsigned lcore_id)
> > rte_panic("cannot set affinity\n");
> > }
> >
> > +/* linear thread id control block */
> > +struct eal_thread_cb {
> > + rte_spinlock_t lock;
> > + uint64_t nb_bucket;
> > + uint64_t bitmap[0];
> > +};
> > +
> > +static struct eal_thread_cb *
> > +__create_tid_pool(void)
> > +{
> > + const struct rte_memzone *mz;
> > + struct eal_thread_cb *pcb;
> > + uint64_t sz;
> > + uint64_t nb_bucket;
> > +
> > + nb_bucket = RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) / 64;
> > + sz = sizeof(*pcb) + nb_bucket * sizeof(uint64_t);
> > + mz = rte_memzone_reserve(LINEAR_THREAD_ID_POOL,
> > + sz, rte_socket_id(), 0);
> > + if (mz == NULL)
> > + rte_panic("Cannot allocate linear thread ID pool\n");
> > +
> > + pcb = mz->addr;
> > + rte_spinlock_init(&pcb->lock);
> > + pcb->nb_bucket = nb_bucket;
> > + memset(pcb->bitmap, 0, nb_bucket * sizeof(uint64_t));
> > +
> > + return pcb;
> > +}
> > +
> > +static int
> > +__get_linear_tid(uint64_t *tid)
> > +{
> > + const struct rte_memzone *mz;
> > + struct eal_thread_cb *pcb;
> > + uint64_t i;
> > + uint8_t shift = 0;
> > +
> > + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> > + if (mz != NULL)
> > + pcb = mz->addr;
> > + else
> > + pcb = __create_tid_pool();
>
>
> As I understand, __get_linear_tid() could be call concurrently from different
> threads?
> If so, then I think we can have a race conditions here with
> memzone_lookup/memzone_create.
> Probably the easiest way to avoid it - make sure that __create_tid_pool() will be
> called at startup,
> when app is still single-threaded and secondary processes are still waiting for
> primary.
> Something like create: rte_eal_tid_init() and call it somewhere in rte_eal_init(),
> before rte_eal_mcfg_complete().
> Konstantin
>
> > +
> > + rte_spinlock_lock(&pcb->lock);
> > + for (i = 0; i < pcb->nb_bucket; i++) {
> > + if (pcb->bitmap[i] == (uint64_t)-1)
> > + continue;
> > + shift = 0;
> > + while (pcb->bitmap[i] & (1UL << shift))
> > + shift ++;
> > + pcb->bitmap[i] |= (1UL << shift);
> > + break;
> > + }
> > + rte_spinlock_unlock(&pcb->lock);
> > +
> > + if (i == pcb->nb_bucket)
> > + return -1;
> > +
> > + *tid = i * 64 + shift;
> > + return 0;
> > +}
> > +
> > +static void __rte_unused
> > +__put_linear_tid(uint64_t tid)
> > +{
> > + const struct rte_memzone *mz;
> > + struct eal_thread_cb *pcb;
> > + uint8_t shift;
> > +
> > + mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
> > + if (!mz)
> > + return;
> > +
> > + pcb = mz->addr;
> > + rte_spinlock_lock(&pcb->lock);
> > + shift = tid & 0x3F;
> > + pcb->bitmap[tid / 64] &= ~(1UL << shift);
> > + rte_spinlock_unlock(&pcb->lock);
> > +}
> > +
> > /* main loop of threads */
> > __attribute__((noreturn)) void *
> > eal_thread_loop(__attribute__((unused)) void *arg)
> > @@ -169,6 +266,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> > char c;
> > int n, ret;
> > unsigned lcore_id;
> > + unsigned long ltid = 0;
> > pthread_t thread_id;
> > int m2s, s2m;
> >
> > @@ -191,6 +289,11 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> > /* set the lcore ID in per-lcore memory area */
> > RTE_PER_LCORE(_lcore_id) = lcore_id;
> >
> > + /* set the linear thread ID in per-lcore memory area */
> > + if (__get_linear_tid(<id) < 0)
> > + rte_panic("cannot get cache slot id\n");
> > + RTE_PER_LCORE(_thread_id) = ltid;
> > +
> > /* set CPU affinity */
> > if (eal_thread_set_affinity() < 0)
> > rte_panic("cannot set affinity\n");
> > --
> > 1.8.1.4
@@ -262,6 +262,11 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func );
*/
int rte_eal_has_hugepages(void);
+#ifndef RTE_MAX_THREAD
+#define RTE_MAX_THREAD RTE_MAX_LCORE
+#endif
+
+
#ifdef __cplusplus
}
#endif
@@ -73,6 +73,7 @@ struct lcore_config {
extern struct lcore_config lcore_config[RTE_MAX_LCORE];
RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per core "core id". */
+RTE_DECLARE_PER_LCORE(unsigned, _thread_id); /**< Per thread "linear tid". */
/**
* Return the ID of the execution unit we are running on.
@@ -86,6 +87,17 @@ rte_lcore_id(void)
}
/**
+ * Return the linear thread ID of the cache unit we are running on.
+ * @return
+ * core ID
+ */
+static inline unsigned long
+rte_linear_thread_id(void)
+{
+ return RTE_PER_LCORE(_thread_id);
+}
+
+/**
* Get the id of the master lcore
*
* @return
@@ -39,6 +39,7 @@
#include <pthread.h>
#include <sched.h>
#include <sys/queue.h>
+#include <string.h>
#include <rte_debug.h>
#include <rte_atomic.h>
@@ -51,12 +52,19 @@
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
#include "eal_private.h"
#include "eal_thread.h"
+#define LINEAR_THREAD_ID_POOL "THREAD_ID_POOL"
+
RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);
+/* define linear thread id as thread-local variables */
+RTE_DEFINE_PER_LCORE(unsigned, _thread_id);
+
/*
* Send a message to a slave lcore identified by slave_id to call a
* function f with argument arg. Once the execution is done, the
@@ -94,12 +102,13 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
return 0;
}
+
/* set affinity for current thread */
static int
-eal_thread_set_affinity(void)
+__eal_thread_set_affinity(pthread_t thread, unsigned lcore)
{
+
int s;
- pthread_t thread;
/*
* According to the section VERSIONS of the CPU_ALLOC man page:
@@ -126,9 +135,8 @@ eal_thread_set_affinity(void)
size = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
CPU_ZERO_S(size, cpusetp);
- CPU_SET_S(rte_lcore_id(), size, cpusetp);
+ CPU_SET_S(lcore, size, cpusetp);
- thread = pthread_self();
s = pthread_setaffinity_np(thread, size, cpusetp);
if (s != 0) {
RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
@@ -140,9 +148,8 @@ eal_thread_set_affinity(void)
#else /* CPU_ALLOC */
cpu_set_t cpuset;
CPU_ZERO( &cpuset );
- CPU_SET( rte_lcore_id(), &cpuset );
+ CPU_SET(lcore, &cpuset );
- thread = pthread_self();
s = pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);
if (s != 0) {
RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
@@ -152,6 +159,15 @@ eal_thread_set_affinity(void)
return 0;
}
+/* set affinity for current thread */
+static int
+eal_thread_set_affinity(void)
+{
+ pthread_t thread = pthread_self();
+
+ return __eal_thread_set_affinity(thread, rte_lcore_id());
+}
+
void eal_thread_init_master(unsigned lcore_id)
{
/* set the lcore ID in per-lcore memory area */
@@ -162,6 +178,87 @@ void eal_thread_init_master(unsigned lcore_id)
rte_panic("cannot set affinity\n");
}
+/* linear thread id control block */
+struct eal_thread_cb {
+ rte_spinlock_t lock;
+ uint64_t nb_bucket;
+ uint64_t bitmap[0];
+};
+
+static struct eal_thread_cb *
+__create_tid_pool(void)
+{
+ const struct rte_memzone *mz;
+ struct eal_thread_cb *pcb;
+ uint64_t sz;
+ uint64_t nb_bucket;
+
+ nb_bucket = RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) / 64;
+ sz = sizeof(*pcb) + nb_bucket * sizeof(uint64_t);
+ mz = rte_memzone_reserve(LINEAR_THREAD_ID_POOL,
+ sz, rte_socket_id(), 0);
+ if (mz == NULL)
+ rte_panic("Cannot allocate linear thread ID pool\n");
+
+ pcb = mz->addr;
+ rte_spinlock_init(&pcb->lock);
+ pcb->nb_bucket = nb_bucket;
+ memset(pcb->bitmap, 0, nb_bucket * sizeof(uint64_t));
+
+ return pcb;
+}
+
+static int
+__get_linear_tid(uint64_t *tid)
+{
+ const struct rte_memzone *mz;
+ struct eal_thread_cb *pcb;
+ uint64_t i;
+ uint8_t shift = 0;
+
+ mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
+ if (mz != NULL)
+ pcb = mz->addr;
+ else
+ pcb = __create_tid_pool();
+
+ rte_spinlock_lock(&pcb->lock);
+ for (i = 0; i < pcb->nb_bucket; i++) {
+ if (pcb->bitmap[i] == (uint64_t)-1)
+ continue;
+ shift = 0;
+ while (pcb->bitmap[i] & (1UL << shift))
+ shift ++;
+ pcb->bitmap[i] |= (1UL << shift);
+ break;
+ }
+ rte_spinlock_unlock(&pcb->lock);
+
+ if (i == pcb->nb_bucket)
+ return -1;
+
+ *tid = i * 64 + shift;
+ return 0;
+}
+
+static void __rte_unused
+__put_linear_tid(uint64_t tid)
+{
+ const struct rte_memzone *mz;
+ struct eal_thread_cb *pcb;
+ uint8_t shift;
+
+ mz = rte_memzone_lookup(LINEAR_THREAD_ID_POOL);
+ if (!mz)
+ return;
+
+ pcb = mz->addr;
+ rte_spinlock_lock(&pcb->lock);
+ shift = tid & 0x3F;
+ pcb->bitmap[tid / 64] &= ~(1UL << shift);
+ rte_spinlock_unlock(&pcb->lock);
+}
+
/* main loop of threads */
__attribute__((noreturn)) void *
eal_thread_loop(__attribute__((unused)) void *arg)
@@ -169,6 +266,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
char c;
int n, ret;
unsigned lcore_id;
+ unsigned long ltid = 0;
pthread_t thread_id;
int m2s, s2m;
@@ -191,6 +289,11 @@ eal_thread_loop(__attribute__((unused)) void *arg)
/* set the lcore ID in per-lcore memory area */
RTE_PER_LCORE(_lcore_id) = lcore_id;
+ /* set the linear thread ID in per-lcore memory area */
+ if (__get_linear_tid(<id) < 0)
+ rte_panic("cannot get cache slot id\n");
+ RTE_PER_LCORE(_thread_id) = ltid;
+
/* set CPU affinity */
if (eal_thread_set_affinity() < 0)
rte_panic("cannot set affinity\n");