[v5] eal: fix race in ctrl thread creation
Checks
Commit Message
The creation of control threads uses a pthread barrier for
synchronization. This patch fixes a race condition where the pthread
barrier could get destroyed while one of the threads has not yet
returned from the pthread_barrier_wait function, which could result in
undefined behaviour.
Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
Cc: jianfeng.tan@intel.com
Cc: stable@dpdk.org
Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
---
Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------
1 file changed, 25 insertions(+), 27 deletions(-)
Comments
Not directly related to this patch, but can someone please explain why
Patchwork is creating a new series everytime I post a new version of
the patch to this thread? I must be doing something wrong but I don't
know what it is. I have been using --in-reply-to with git send-email
but that's apparently not enough. Maybe I'm missing something but I
see several items in Patchwork for this single thread when, IIUC, it
should only be one.
Thank you.
Hi Luc,
On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote:
> The creation of control threads uses a pthread barrier for
> synchronization. This patch fixes a race condition where the pthread
> barrier could get destroyed while one of the threads has not yet
> returned from the pthread_barrier_wait function, which could result in
> undefined behaviour.
>
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> ---
>
> Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
>
> lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------
> 1 file changed, 25 insertions(+), 27 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902..c1044e795 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
> void *(*start_routine)(void *);
> void *arg;
> pthread_barrier_t configured;
> + unsigned int refcnt;
> };
>
> +static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> +{
> + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
> + pthread_barrier_destroy(¶ms->configured);
> + free(params);
> + }
> +}
> +
> static void *ctrl_thread_init(void *arg)
> {
> - int ret;
> struct internal_config *internal_conf =
> eal_get_internal_configuration();
> rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg)
>
> __rte_thread_init(rte_lcore_id(), cpuset);
>
> - ret = pthread_barrier_wait(¶ms->configured);
> - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> - pthread_barrier_destroy(¶ms->configured);
> - free(params);
> - }
> + pthread_barrier_wait(¶ms->configured);
> + ctrl_params_free(params);
>
> return start_routine(routine_arg);
> }
> @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>
> params->start_routine = start_routine;
> params->arg = arg;
> + params->refcnt = 2;
>
> - pthread_barrier_init(¶ms->configured, NULL, 2);
> + ret = pthread_barrier_init(¶ms->configured, NULL, 2);
> + if (ret != 0)
> + goto fail_no_barrier;
>
> ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> - if (ret != 0) {
> - free(params);
> - return -ret;
> - }
> + if (ret != 0)
> + goto fail_with_barrier;
>
> if (name != NULL) {
> ret = rte_thread_setname(*thread, name);
> @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> }
>
> ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> - if (ret)
> - goto fail;
> + pthread_barrier_wait(¶ms->configured);
> + ctrl_params_free(params);
>
> - ret = pthread_barrier_wait(¶ms->configured);
> - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> - pthread_barrier_destroy(¶ms->configured);
> - free(params);
> - }
> + return -ret;
I think not killing the thread when pthread_setaffinity_np() returns an
error is not very understandable from the API user point of view.
What about doing this on top of your patch? The idea is to set
start_routine to NULL before the barrier if pthread_setaffinity_np()
failed. So there is no need to cancel the thread, it will exit by
itself.
@@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg)
eal_get_internal_configuration();
rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
struct rte_thread_ctrl_params *params = arg;
- void *(*start_routine)(void *) = params->start_routine;
+ void *(*start_routine)(void *);
void *routine_arg = params->arg;
__rte_thread_init(rte_lcore_id(), cpuset);
pthread_barrier_wait(¶ms->configured);
+ start_routine = params->start_routine;
ctrl_params_free(params);
+ if (start_routine == NULL)
+ return NULL;
+
return start_routine(routine_arg);
}
@@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
}
ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
+ if (ret != 0)
+ params->start_routine = NULL;
+
pthread_barrier_wait(¶ms->configured);
ctrl_params_free(params);
- return -ret;
+ if (ret != 0) {
+ pthread_join(*thread, NULL);
+ return -ret;
+ }
+
+ return 0;
fail_with_barrier:
pthread_barrier_destroy(¶ms->configured);
Regards,
Olivier
<snip>
>
> Hi Luc,
>
> On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote:
> > The creation of control threads uses a pthread barrier for
> > synchronization. This patch fixes a race condition where the pthread
> > barrier could get destroyed while one of the threads has not yet
> > returned from the pthread_barrier_wait function, which could result in
> > undefined behaviour.
> >
> > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread
> > creation")
> > Cc: jianfeng.tan@intel.com
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> > ---
> >
> > Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
> >
> > lib/librte_eal/common/eal_common_thread.c | 52
> > +++++++++++------------
> > 1 file changed, 25 insertions(+), 27 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/eal_common_thread.c
> > b/lib/librte_eal/common/eal_common_thread.c
> > index 73a055902..c1044e795 100644
> > --- a/lib/librte_eal/common/eal_common_thread.c
> > +++ b/lib/librte_eal/common/eal_common_thread.c
> > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
> > void *(*start_routine)(void *);
> > void *arg;
> > pthread_barrier_t configured;
> > + unsigned int refcnt;
> > };
> >
> > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) {
> > + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0) {
> > + pthread_barrier_destroy(¶ms->configured);
> > + free(params);
> > + }
> > +}
> > +
> > static void *ctrl_thread_init(void *arg) {
> > - int ret;
> > struct internal_config *internal_conf =
> > eal_get_internal_configuration();
> > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11
> > +192,8 @@ static void *ctrl_thread_init(void *arg)
> >
> > __rte_thread_init(rte_lcore_id(), cpuset);
> >
> > - ret = pthread_barrier_wait(¶ms->configured);
> > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> > - pthread_barrier_destroy(¶ms->configured);
> > - free(params);
> > - }
> > + pthread_barrier_wait(¶ms->configured);
> > + ctrl_params_free(params);
> >
> > return start_routine(routine_arg);
> > }
> > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const
> > char *name,
> >
> > params->start_routine = start_routine;
> > params->arg = arg;
> > + params->refcnt = 2;
> >
> > - pthread_barrier_init(¶ms->configured, NULL, 2);
> > + ret = pthread_barrier_init(¶ms->configured, NULL, 2);
> > + if (ret != 0)
> > + goto fail_no_barrier;
> >
> > ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> > - if (ret != 0) {
> > - free(params);
> > - return -ret;
> > - }
> > + if (ret != 0)
> > + goto fail_with_barrier;
> >
> > if (name != NULL) {
> > ret = rte_thread_setname(*thread, name); @@ -227,25
> +233,17 @@
> > rte_ctrl_thread_create(pthread_t *thread, const char *name,
> > }
> >
> > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> > - if (ret)
> > - goto fail;
> > + pthread_barrier_wait(¶ms->configured);
> > + ctrl_params_free(params);
> >
> > - ret = pthread_barrier_wait(¶ms->configured);
> > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> > - pthread_barrier_destroy(¶ms->configured);
> > - free(params);
> > - }
> > + return -ret;
>
> I think not killing the thread when pthread_setaffinity_np() returns an error is
> not very understandable from the API user point of view.
Agree.
>
> What about doing this on top of your patch? The idea is to set start_routine
> to NULL before the barrier if pthread_setaffinity_np() failed. So there is no
> need to cancel the thread, it will exit by itself.
How about using the pthread_attr_setaffinity_np API?
It is deviating from the documentation of the 'rte_ctrl_thread_create'. But, from the user perspective, the behavior should not change.
This way we do not have to handle the error after the thread is launched.
>
> @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg)
> eal_get_internal_configuration();
> rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> struct rte_thread_ctrl_params *params = arg;
> - void *(*start_routine)(void *) = params->start_routine;
> + void *(*start_routine)(void *);
> void *routine_arg = params->arg;
>
> __rte_thread_init(rte_lcore_id(), cpuset);
>
> pthread_barrier_wait(¶ms->configured);
> + start_routine = params->start_routine;
> ctrl_params_free(params);
>
> + if (start_routine == NULL)
> + return NULL;
> +
> return start_routine(routine_arg);
> }
>
> @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
> }
>
> ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> + if (ret != 0)
> + params->start_routine = NULL;
> +
> pthread_barrier_wait(¶ms->configured);
> ctrl_params_free(params);
>
> - return -ret;
> + if (ret != 0) {
> + pthread_join(*thread, NULL);
> + return -ret;
> + }
> +
> + return 0;
>
> fail_with_barrier:
> pthread_barrier_destroy(¶ms->configured);
>
>
> Regards,
> Olivier
@@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
void *(*start_routine)(void *);
void *arg;
pthread_barrier_t configured;
+ unsigned int refcnt;
};
+static void ctrl_params_free(struct rte_thread_ctrl_params *params)
+{
+ if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
+ pthread_barrier_destroy(¶ms->configured);
+ free(params);
+ }
+}
+
static void *ctrl_thread_init(void *arg)
{
- int ret;
struct internal_config *internal_conf =
eal_get_internal_configuration();
rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
@@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg)
__rte_thread_init(rte_lcore_id(), cpuset);
- ret = pthread_barrier_wait(¶ms->configured);
- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
- pthread_barrier_destroy(¶ms->configured);
- free(params);
- }
+ pthread_barrier_wait(¶ms->configured);
+ ctrl_params_free(params);
return start_routine(routine_arg);
}
@@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
params->start_routine = start_routine;
params->arg = arg;
+ params->refcnt = 2;
- pthread_barrier_init(¶ms->configured, NULL, 2);
+ ret = pthread_barrier_init(¶ms->configured, NULL, 2);
+ if (ret != 0)
+ goto fail_no_barrier;
ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
- if (ret != 0) {
- free(params);
- return -ret;
- }
+ if (ret != 0)
+ goto fail_with_barrier;
if (name != NULL) {
ret = rte_thread_setname(*thread, name);
@@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
}
ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
- if (ret)
- goto fail;
+ pthread_barrier_wait(¶ms->configured);
+ ctrl_params_free(params);
- ret = pthread_barrier_wait(¶ms->configured);
- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
- pthread_barrier_destroy(¶ms->configured);
- free(params);
- }
+ return -ret;
- return 0;
+fail_with_barrier:
+ pthread_barrier_destroy(¶ms->configured);
+
+fail_no_barrier:
+ free(params);
-fail:
- if (PTHREAD_BARRIER_SERIAL_THREAD ==
- pthread_barrier_wait(¶ms->configured)) {
- pthread_barrier_destroy(¶ms->configured);
- free(params);
- }
- pthread_cancel(*thread);
- pthread_join(*thread, NULL);
return -ret;
}