[v3] eal: fix race in ctrl thread creation
Checks
Commit Message
The creation of control threads used a pthread barrier for
synchronization. This patch fixes a race condition where the pthread
barrier could get destroyed while one of the threads has not yet
returned from the pthread_barrier_wait function, which could result in
undefined behaviour. The barrier has been completely removed in favour
of a reference count on the control thread parameters struct.
Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
Cc: jianfeng.tan@intel.com
Cc: stable@dpdk.org
Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
---
Hi Olivier,
Hi Honnappa,
Thanks for your input Honnappa. I've made the changes to completely
remove the barrier. However, I didn't move the call to
pthread_setaffinity_np to the control thread; I think we want to report
the result of that function to the caller of rte_ctrl_thread_create and
doing so from ctrl_thread_init would be a lot trickier.
Olivier, what do you think of these changes?
lib/librte_eal/common/eal_common_thread.c | 35 ++++++++---------------
1 file changed, 12 insertions(+), 23 deletions(-)
Comments
<snip>
>
> The creation of control threads used a pthread barrier for synchronization.
> This patch fixes a race condition where the pthread barrier could get
> destroyed while one of the threads has not yet returned from the
> pthread_barrier_wait function, which could result in undefined behaviour.
> The barrier has been completely removed in favour of a reference count on
> the control thread parameters struct.
>
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> ---
>
> Hi Olivier,
> Hi Honnappa,
>
> Thanks for your input Honnappa. I've made the changes to completely
> remove the barrier. However, I didn't move the call to pthread_setaffinity_np
I looked at the rte_ctrl_thread_create API definition. I am not sure if we have much leavy here as the API definition bakes in calling pthread_setaffinity_np.
With the barrier, the control thread did not execute its function till it was moved to the assigned core. If we remove the barrier and not move the pthread_setaffinity_np to ctrl_thread_init, the behavior might change.
One more comment below on how we are handling the setaffinity failure.
> to the control thread; I think we want to report the result of that function to
> the caller of rte_ctrl_thread_create and doing so from ctrl_thread_init would
> be a lot trickier.
>
> Olivier, what do you think of these changes?
>
> lib/librte_eal/common/eal_common_thread.c | 35 ++++++++---------------
> 1 file changed, 12 insertions(+), 23 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_thread.c
> b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902..2421066f9 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -169,12 +169,11 @@ __rte_thread_uninit(void) struct
> rte_thread_ctrl_params {
> void *(*start_routine)(void *);
> void *arg;
> - pthread_barrier_t configured;
> + unsigned int refcnt;
> };
>
> static void *ctrl_thread_init(void *arg) {
> - int ret;
> struct internal_config *internal_conf =
> eal_get_internal_configuration();
> rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11
> +183,8 @@ static void *ctrl_thread_init(void *arg)
>
> __rte_thread_init(rte_lcore_id(), cpuset);
>
> - ret = pthread_barrier_wait(¶ms->configured);
> - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> - pthread_barrier_destroy(¶ms->configured);
> + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0)
> free(params);
> - }
>
> return start_routine(routine_arg);
> }
> @@ -210,14 +206,11 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
>
> params->start_routine = start_routine;
> params->arg = arg;
> -
> - pthread_barrier_init(¶ms->configured, NULL, 2);
> + params->refcnt = 2;
>
> ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> - if (ret != 0) {
> - free(params);
> - return -ret;
> - }
> + if (ret != 0)
> + goto fail;
>
> if (name != NULL) {
> ret = rte_thread_setname(*thread, name); @@ -228,24
> +221,20 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>
> ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> if (ret)
> - goto fail;
> + goto fail_cancel;
>
> - ret = pthread_barrier_wait(¶ms->configured);
> - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> - pthread_barrier_destroy(¶ms->configured);
> + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0)
> free(params);
> - }
>
> return 0;
>
> -fail:
> - if (PTHREAD_BARRIER_SERIAL_THREAD ==
> - pthread_barrier_wait(¶ms->configured)) {
> - pthread_barrier_destroy(¶ms->configured);
> - free(params);
> - }
> +fail_cancel:
> pthread_cancel(*thread);
For the control thread to be able to respond to pthread_cancel, it needs to call one of the library functions that is considered a cancellation point. I do not see such requirements in the API definition.
> pthread_join(*thread, NULL);
> +
> +fail:
> + free(params);
> +
> return -ret;
> }
>
> --
> 2.25.1
@@ -169,12 +169,11 @@ __rte_thread_uninit(void)
struct rte_thread_ctrl_params {
void *(*start_routine)(void *);
void *arg;
- pthread_barrier_t configured;
+ unsigned int refcnt;
};
static void *ctrl_thread_init(void *arg)
{
- int ret;
struct internal_config *internal_conf =
eal_get_internal_configuration();
rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
@@ -184,11 +183,8 @@ static void *ctrl_thread_init(void *arg)
__rte_thread_init(rte_lcore_id(), cpuset);
- ret = pthread_barrier_wait(¶ms->configured);
- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
- pthread_barrier_destroy(¶ms->configured);
+ if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0)
free(params);
- }
return start_routine(routine_arg);
}
@@ -210,14 +206,11 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
params->start_routine = start_routine;
params->arg = arg;
-
- pthread_barrier_init(¶ms->configured, NULL, 2);
+ params->refcnt = 2;
ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
- if (ret != 0) {
- free(params);
- return -ret;
- }
+ if (ret != 0)
+ goto fail;
if (name != NULL) {
ret = rte_thread_setname(*thread, name);
@@ -228,24 +221,20 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
if (ret)
- goto fail;
+ goto fail_cancel;
- ret = pthread_barrier_wait(¶ms->configured);
- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
- pthread_barrier_destroy(¶ms->configured);
+ if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0)
free(params);
- }
return 0;
-fail:
- if (PTHREAD_BARRIER_SERIAL_THREAD ==
- pthread_barrier_wait(¶ms->configured)) {
- pthread_barrier_destroy(¶ms->configured);
- free(params);
- }
+fail_cancel:
pthread_cancel(*thread);
pthread_join(*thread, NULL);
+
+fail:
+ free(params);
+
return -ret;
}