Message ID | 20210407125322.124327-1-lucp.at.work@gmail.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | David Marchand |
Headers | show |
Series | [v5] eal: fix race in ctrl thread creation | expand |
Context | Check | Description |
---|---|---|
ci/intel-Testing | success | Testing PASS |
ci/iol-testing | success | Testing PASS |
ci/iol-abi-testing | success | Testing PASS |
ci/iol-mellanox-Performance | success | Performance Testing PASS |
ci/iol-intel-Performance | success | Performance Testing PASS |
ci/Intel-compilation | success | Compilation OK |
ci/github-robot | success | github build: passed |
ci/travis-robot | success | travis build: passed |
ci/checkpatch | success | coding style OK |
Not directly related to this patch, but can someone please explain why Patchwork is creating a new series everytime I post a new version of the patch to this thread? I must be doing something wrong but I don't know what it is. I have been using --in-reply-to with git send-email but that's apparently not enough. Maybe I'm missing something but I see several items in Patchwork for this single thread when, IIUC, it should only be one. Thank you.
Hi Luc, On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote: > The creation of control threads uses a pthread barrier for > synchronization. This patch fixes a race condition where the pthread > barrier could get destroyed while one of the threads has not yet > returned from the pthread_barrier_wait function, which could result in > undefined behaviour. > > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation") > Cc: jianfeng.tan@intel.com > Cc: stable@dpdk.org > > Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com> > --- > > Same as v4 except that I fixed 2 minor style issues flagged by patchwork. > > lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------ > 1 file changed, 25 insertions(+), 27 deletions(-) > > diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c > index 73a055902..c1044e795 100644 > --- a/lib/librte_eal/common/eal_common_thread.c > +++ b/lib/librte_eal/common/eal_common_thread.c > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params { > void *(*start_routine)(void *); > void *arg; > pthread_barrier_t configured; > + unsigned int refcnt; > }; > > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) > +{ > + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) { > + pthread_barrier_destroy(¶ms->configured); > + free(params); > + } > +} > + > static void *ctrl_thread_init(void *arg) > { > - int ret; > struct internal_config *internal_conf = > eal_get_internal_configuration(); > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; > @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg) > > __rte_thread_init(rte_lcore_id(), cpuset); > > - ret = pthread_barrier_wait(¶ms->configured); > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > - pthread_barrier_destroy(¶ms->configured); > - free(params); > - } > + pthread_barrier_wait(¶ms->configured); > + ctrl_params_free(params); > > return start_routine(routine_arg); > } > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, > > params->start_routine = start_routine; > params->arg = arg; > + params->refcnt = 2; > > - pthread_barrier_init(¶ms->configured, NULL, 2); > + ret = pthread_barrier_init(¶ms->configured, NULL, 2); > + if (ret != 0) > + goto fail_no_barrier; > > ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); > - if (ret != 0) { > - free(params); > - return -ret; > - } > + if (ret != 0) > + goto fail_with_barrier; > > if (name != NULL) { > ret = rte_thread_setname(*thread, name); > @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, > } > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > - if (ret) > - goto fail; > + pthread_barrier_wait(¶ms->configured); > + ctrl_params_free(params); > > - ret = pthread_barrier_wait(¶ms->configured); > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > - pthread_barrier_destroy(¶ms->configured); > - free(params); > - } > + return -ret; I think not killing the thread when pthread_setaffinity_np() returns an error is not very understandable from the API user point of view. What about doing this on top of your patch? The idea is to set start_routine to NULL before the barrier if pthread_setaffinity_np() failed. So there is no need to cancel the thread, it will exit by itself. @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg) eal_get_internal_configuration(); rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; struct rte_thread_ctrl_params *params = arg; - void *(*start_routine)(void *) = params->start_routine; + void *(*start_routine)(void *); void *routine_arg = params->arg; __rte_thread_init(rte_lcore_id(), cpuset); pthread_barrier_wait(¶ms->configured); + start_routine = params->start_routine; ctrl_params_free(params); + if (start_routine == NULL) + return NULL; + return start_routine(routine_arg); } @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, } ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); + if (ret != 0) + params->start_routine = NULL; + pthread_barrier_wait(¶ms->configured); ctrl_params_free(params); - return -ret; + if (ret != 0) { + pthread_join(*thread, NULL); + return -ret; + } + + return 0; fail_with_barrier: pthread_barrier_destroy(¶ms->configured); Regards, Olivier
<snip> > > Hi Luc, > > On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote: > > The creation of control threads uses a pthread barrier for > > synchronization. This patch fixes a race condition where the pthread > > barrier could get destroyed while one of the threads has not yet > > returned from the pthread_barrier_wait function, which could result in > > undefined behaviour. > > > > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread > > creation") > > Cc: jianfeng.tan@intel.com > > Cc: stable@dpdk.org > > > > Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com> > > --- > > > > Same as v4 except that I fixed 2 minor style issues flagged by patchwork. > > > > lib/librte_eal/common/eal_common_thread.c | 52 > > +++++++++++------------ > > 1 file changed, 25 insertions(+), 27 deletions(-) > > > > diff --git a/lib/librte_eal/common/eal_common_thread.c > > b/lib/librte_eal/common/eal_common_thread.c > > index 73a055902..c1044e795 100644 > > --- a/lib/librte_eal/common/eal_common_thread.c > > +++ b/lib/librte_eal/common/eal_common_thread.c > > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params { > > void *(*start_routine)(void *); > > void *arg; > > pthread_barrier_t configured; > > + unsigned int refcnt; > > }; > > > > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) { > > + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == > 0) { > > + pthread_barrier_destroy(¶ms->configured); > > + free(params); > > + } > > +} > > + > > static void *ctrl_thread_init(void *arg) { > > - int ret; > > struct internal_config *internal_conf = > > eal_get_internal_configuration(); > > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11 > > +192,8 @@ static void *ctrl_thread_init(void *arg) > > > > __rte_thread_init(rte_lcore_id(), cpuset); > > > > - ret = pthread_barrier_wait(¶ms->configured); > > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > > - pthread_barrier_destroy(¶ms->configured); > > - free(params); > > - } > > + pthread_barrier_wait(¶ms->configured); > > + ctrl_params_free(params); > > > > return start_routine(routine_arg); > > } > > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const > > char *name, > > > > params->start_routine = start_routine; > > params->arg = arg; > > + params->refcnt = 2; > > > > - pthread_barrier_init(¶ms->configured, NULL, 2); > > + ret = pthread_barrier_init(¶ms->configured, NULL, 2); > > + if (ret != 0) > > + goto fail_no_barrier; > > > > ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); > > - if (ret != 0) { > > - free(params); > > - return -ret; > > - } > > + if (ret != 0) > > + goto fail_with_barrier; > > > > if (name != NULL) { > > ret = rte_thread_setname(*thread, name); @@ -227,25 > +233,17 @@ > > rte_ctrl_thread_create(pthread_t *thread, const char *name, > > } > > > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > > - if (ret) > > - goto fail; > > + pthread_barrier_wait(¶ms->configured); > > + ctrl_params_free(params); > > > > - ret = pthread_barrier_wait(¶ms->configured); > > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > > - pthread_barrier_destroy(¶ms->configured); > > - free(params); > > - } > > + return -ret; > > I think not killing the thread when pthread_setaffinity_np() returns an error is > not very understandable from the API user point of view. Agree. > > What about doing this on top of your patch? The idea is to set start_routine > to NULL before the barrier if pthread_setaffinity_np() failed. So there is no > need to cancel the thread, it will exit by itself. How about using the pthread_attr_setaffinity_np API? It is deviating from the documentation of the 'rte_ctrl_thread_create'. But, from the user perspective, the behavior should not change. This way we do not have to handle the error after the thread is launched. > > @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg) > eal_get_internal_configuration(); > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; > struct rte_thread_ctrl_params *params = arg; > - void *(*start_routine)(void *) = params->start_routine; > + void *(*start_routine)(void *); > void *routine_arg = params->arg; > > __rte_thread_init(rte_lcore_id(), cpuset); > > pthread_barrier_wait(¶ms->configured); > + start_routine = params->start_routine; > ctrl_params_free(params); > > + if (start_routine == NULL) > + return NULL; > + > return start_routine(routine_arg); > } > > @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const > char *name, > } > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > + if (ret != 0) > + params->start_routine = NULL; > + > pthread_barrier_wait(¶ms->configured); > ctrl_params_free(params); > > - return -ret; > + if (ret != 0) { > + pthread_join(*thread, NULL); > + return -ret; > + } > + > + return 0; > > fail_with_barrier: > pthread_barrier_destroy(¶ms->configured); > > > Regards, > Olivier
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c index 73a055902..c1044e795 100644 --- a/lib/librte_eal/common/eal_common_thread.c +++ b/lib/librte_eal/common/eal_common_thread.c @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params { void *(*start_routine)(void *); void *arg; pthread_barrier_t configured; + unsigned int refcnt; }; +static void ctrl_params_free(struct rte_thread_ctrl_params *params) +{ + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) { + pthread_barrier_destroy(¶ms->configured); + free(params); + } +} + static void *ctrl_thread_init(void *arg) { - int ret; struct internal_config *internal_conf = eal_get_internal_configuration(); rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg) __rte_thread_init(rte_lcore_id(), cpuset); - ret = pthread_barrier_wait(¶ms->configured); - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { - pthread_barrier_destroy(¶ms->configured); - free(params); - } + pthread_barrier_wait(¶ms->configured); + ctrl_params_free(params); return start_routine(routine_arg); } @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, params->start_routine = start_routine; params->arg = arg; + params->refcnt = 2; - pthread_barrier_init(¶ms->configured, NULL, 2); + ret = pthread_barrier_init(¶ms->configured, NULL, 2); + if (ret != 0) + goto fail_no_barrier; ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); - if (ret != 0) { - free(params); - return -ret; - } + if (ret != 0) + goto fail_with_barrier; if (name != NULL) { ret = rte_thread_setname(*thread, name); @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, } ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); - if (ret) - goto fail; + pthread_barrier_wait(¶ms->configured); + ctrl_params_free(params); - ret = pthread_barrier_wait(¶ms->configured); - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { - pthread_barrier_destroy(¶ms->configured); - free(params); - } + return -ret; - return 0; +fail_with_barrier: + pthread_barrier_destroy(¶ms->configured); + +fail_no_barrier: + free(params); -fail: - if (PTHREAD_BARRIER_SERIAL_THREAD == - pthread_barrier_wait(¶ms->configured)) { - pthread_barrier_destroy(¶ms->configured); - free(params); - } - pthread_cancel(*thread); - pthread_join(*thread, NULL); return -ret; }
The creation of control threads uses a pthread barrier for synchronization. This patch fixes a race condition where the pthread barrier could get destroyed while one of the threads has not yet returned from the pthread_barrier_wait function, which could result in undefined behaviour. Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation") Cc: jianfeng.tan@intel.com Cc: stable@dpdk.org Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com> --- Same as v4 except that I fixed 2 minor style issues flagged by patchwork. lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------ 1 file changed, 25 insertions(+), 27 deletions(-)