[v3] eal: fix race in ctrl thread creation

Message ID 20210406161534.103272-1-lucp.at.work@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v3] eal: fix race in ctrl thread creation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/travis-robot success travis build: passed
ci/github-robot success github build: passed
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS

Commit Message

Luc Pelletier April 6, 2021, 4:15 p.m. UTC
  The creation of control threads used a pthread barrier for
synchronization. This patch fixes a race condition where the pthread
barrier could get destroyed while one of the threads has not yet
returned from the pthread_barrier_wait function, which could result in
undefined behaviour. The barrier has been completely removed in favour
of a reference count on the control thread parameters struct.

Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
Cc: jianfeng.tan@intel.com
Cc: stable@dpdk.org

Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
---

Hi Olivier,
Hi Honnappa,

Thanks for your input Honnappa. I've made the changes to completely
remove the barrier. However, I didn't move the call to
pthread_setaffinity_np to the control thread; I think we want to report
the result of that function to the caller of rte_ctrl_thread_create and
doing so from ctrl_thread_init would be a lot trickier.

Olivier, what do you think of these changes?

 lib/librte_eal/common/eal_common_thread.c | 35 ++++++++---------------
 1 file changed, 12 insertions(+), 23 deletions(-)
  

Comments

Honnappa Nagarahalli April 6, 2021, 9:10 p.m. UTC | #1
<snip>

> 
> The creation of control threads used a pthread barrier for synchronization.
> This patch fixes a race condition where the pthread barrier could get
> destroyed while one of the threads has not yet returned from the
> pthread_barrier_wait function, which could result in undefined behaviour.
> The barrier has been completely removed in favour of a reference count on
> the control thread parameters struct.
> 
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> ---
> 
> Hi Olivier,
> Hi Honnappa,
> 
> Thanks for your input Honnappa. I've made the changes to completely
> remove the barrier. However, I didn't move the call to pthread_setaffinity_np
I looked at the rte_ctrl_thread_create API definition. I am not sure if we have much leavy here as the API definition bakes in calling pthread_setaffinity_np.
With the barrier, the control thread did not execute its function till it was moved to the assigned core. If we remove the barrier and not move the pthread_setaffinity_np to ctrl_thread_init, the behavior might change.

One more comment below on how we are handling the setaffinity failure.

> to the control thread; I think we want to report the result of that function to
> the caller of rte_ctrl_thread_create and doing so from ctrl_thread_init would
> be a lot trickier.
> 
> Olivier, what do you think of these changes?
> 
>  lib/librte_eal/common/eal_common_thread.c | 35 ++++++++---------------
>  1 file changed, 12 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_thread.c
> b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902..2421066f9 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -169,12 +169,11 @@ __rte_thread_uninit(void)  struct
> rte_thread_ctrl_params {
>  	void *(*start_routine)(void *);
>  	void *arg;
> -	pthread_barrier_t configured;
> +	unsigned int refcnt;
>  };
> 
>  static void *ctrl_thread_init(void *arg)  {
> -	int ret;
>  	struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
>  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11
> +183,8 @@ static void *ctrl_thread_init(void *arg)
> 
>  	__rte_thread_init(rte_lcore_id(), cpuset);
> 
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -		pthread_barrier_destroy(&params->configured);
> +	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0)
>  		free(params);
> -	}
> 
>  	return start_routine(routine_arg);
>  }
> @@ -210,14 +206,11 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
> 
>  	params->start_routine = start_routine;
>  	params->arg = arg;
> -
> -	pthread_barrier_init(&params->configured, NULL, 2);
> +	params->refcnt = 2;
> 
>  	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> -	if (ret != 0) {
> -		free(params);
> -		return -ret;
> -	}
> +	if (ret != 0)
> +		goto fail;
> 
>  	if (name != NULL) {
>  		ret = rte_thread_setname(*thread, name); @@ -228,24
> +221,20 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
> 
>  	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
>  	if (ret)
> -		goto fail;
> +		goto fail_cancel;
> 
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -		pthread_barrier_destroy(&params->configured);
> +	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0)
>  		free(params);
> -	}
> 
>  	return 0;
> 
> -fail:
> -	if (PTHREAD_BARRIER_SERIAL_THREAD ==
> -	    pthread_barrier_wait(&params->configured)) {
> -		pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> +fail_cancel:
>  	pthread_cancel(*thread);
For the control thread to be able to respond to pthread_cancel, it needs to call one of the library functions that is considered a cancellation point. I do not see such requirements in the API definition.


>  	pthread_join(*thread, NULL);
> +
> +fail:
> +	free(params);
> +
>  	return -ret;
>  }
> 
> --
> 2.25.1
  

Patch

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 73a055902..2421066f9 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -169,12 +169,11 @@  __rte_thread_uninit(void)
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
-	pthread_barrier_t configured;
+	unsigned int refcnt;
 };
 
 static void *ctrl_thread_init(void *arg)
 {
-	int ret;
 	struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
@@ -184,11 +183,8 @@  static void *ctrl_thread_init(void *arg)
 
 	__rte_thread_init(rte_lcore_id(), cpuset);
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
-		pthread_barrier_destroy(&params->configured);
+	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0)
 		free(params);
-	}
 
 	return start_routine(routine_arg);
 }
@@ -210,14 +206,11 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	params->start_routine = start_routine;
 	params->arg = arg;
-
-	pthread_barrier_init(&params->configured, NULL, 2);
+	params->refcnt = 2;
 
 	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
-	if (ret != 0) {
-		free(params);
-		return -ret;
-	}
+	if (ret != 0)
+		goto fail;
 
 	if (name != NULL) {
 		ret = rte_thread_setname(*thread, name);
@@ -228,24 +221,20 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
 	if (ret)
-		goto fail;
+		goto fail_cancel;
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
-		pthread_barrier_destroy(&params->configured);
+	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0)
 		free(params);
-	}
 
 	return 0;
 
-fail:
-	if (PTHREAD_BARRIER_SERIAL_THREAD ==
-	    pthread_barrier_wait(&params->configured)) {
-		pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
+fail_cancel:
 	pthread_cancel(*thread);
 	pthread_join(*thread, NULL);
+
+fail:
+	free(params);
+
 	return -ret;
 }