[v5] eal: fix race in ctrl thread creation

Message ID 20210407125322.124327-1-lucp.at.work@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v5] eal: fix race in ctrl thread creation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success travis build: passed
ci/github-robot success github build: passed
ci/Intel-compilation success Compilation OK
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/intel-Testing success Testing PASS

Commit Message

Luc Pelletier April 7, 2021, 12:53 p.m. UTC
  The creation of control threads uses a pthread barrier for
synchronization. This patch fixes a race condition where the pthread
barrier could get destroyed while one of the threads has not yet
returned from the pthread_barrier_wait function, which could result in
undefined behaviour.

Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
Cc: jianfeng.tan@intel.com
Cc: stable@dpdk.org

Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
---

Same as v4 except that I fixed 2 minor style issues flagged by patchwork.

 lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------
 1 file changed, 25 insertions(+), 27 deletions(-)
  

Comments

Luc Pelletier April 7, 2021, 1:22 p.m. UTC | #1
Not directly related to this patch, but can someone please explain why
Patchwork is creating a new series everytime I post a new version of
the patch to this thread? I must be doing something wrong but I don't
know what it is. I have been using --in-reply-to with git send-email
but that's apparently not enough. Maybe I'm missing something but I
see several items in Patchwork for this single thread when, IIUC, it
should only be one.

Thank you.
  
Olivier Matz April 7, 2021, 1:31 p.m. UTC | #2
Hi Luc,

On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote:
> The creation of control threads uses a pthread barrier for
> synchronization. This patch fixes a race condition where the pthread
> barrier could get destroyed while one of the threads has not yet
> returned from the pthread_barrier_wait function, which could result in
> undefined behaviour.
> 
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> ---
> 
> Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
> 
>  lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------
>  1 file changed, 25 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902..c1044e795 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
>  	void *(*start_routine)(void *);
>  	void *arg;
>  	pthread_barrier_t configured;
> +	unsigned int refcnt;
>  };
>  
> +static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> +{
> +	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
> +		pthread_barrier_destroy(&params->configured);
> +		free(params);
> +	}
> +}
> +
>  static void *ctrl_thread_init(void *arg)
>  {
> -	int ret;
>  	struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
>  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg)
>  
>  	__rte_thread_init(rte_lcore_id(), cpuset);
>  
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -		pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> +	pthread_barrier_wait(&params->configured);
> +	ctrl_params_free(params);
>  
>  	return start_routine(routine_arg);
>  }
> @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  
>  	params->start_routine = start_routine;
>  	params->arg = arg;
> +	params->refcnt = 2;
>  
> -	pthread_barrier_init(&params->configured, NULL, 2);
> +	ret = pthread_barrier_init(&params->configured, NULL, 2);
> +	if (ret != 0)
> +		goto fail_no_barrier;
>  
>  	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> -	if (ret != 0) {
> -		free(params);
> -		return -ret;
> -	}
> +	if (ret != 0)
> +		goto fail_with_barrier;
>  
>  	if (name != NULL) {
>  		ret = rte_thread_setname(*thread, name);
> @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  	}
>  
>  	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> -	if (ret)
> -		goto fail;
> +	pthread_barrier_wait(&params->configured);
> +	ctrl_params_free(params);
>  
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -		pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> +	return -ret;

I think not killing the thread when pthread_setaffinity_np() returns an
error is not very understandable from the API user point of view.

What about doing this on top of your patch? The idea is to set
start_routine to NULL before the barrier if pthread_setaffinity_np()
failed. So there is no need to cancel the thread, it will exit by
itself.

  @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg)
                  eal_get_internal_configuration();
          rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
          struct rte_thread_ctrl_params *params = arg;
  -       void *(*start_routine)(void *) = params->start_routine;
  +       void *(*start_routine)(void *);
          void *routine_arg = params->arg;
   
          __rte_thread_init(rte_lcore_id(), cpuset);
   
          pthread_barrier_wait(&params->configured);
  +       start_routine = params->start_routine;
          ctrl_params_free(params);
   
  +       if (start_routine == NULL)
  +               return NULL;
  +
          return start_routine(routine_arg);
   }
   
  @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
          }
   
          ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
  +       if (ret != 0)
  +               params->start_routine = NULL;
  +
          pthread_barrier_wait(&params->configured);
          ctrl_params_free(params);
   
  -       return -ret;
  +       if (ret != 0) {
  +               pthread_join(*thread, NULL);
  +               return -ret;
  +       }
  +
  +       return 0;
   
   fail_with_barrier:
          pthread_barrier_destroy(&params->configured);


Regards,
Olivier
  
Honnappa Nagarahalli April 7, 2021, 3:15 p.m. UTC | #3
<snip>

> 
> Hi Luc,
> 
> On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote:
> > The creation of control threads uses a pthread barrier for
> > synchronization. This patch fixes a race condition where the pthread
> > barrier could get destroyed while one of the threads has not yet
> > returned from the pthread_barrier_wait function, which could result in
> > undefined behaviour.
> >
> > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread
> > creation")
> > Cc: jianfeng.tan@intel.com
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Luc Pelletier <lucp.at.work@gmail.com>
> > ---
> >
> > Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
> >
> >  lib/librte_eal/common/eal_common_thread.c | 52
> > +++++++++++------------
> >  1 file changed, 25 insertions(+), 27 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/eal_common_thread.c
> > b/lib/librte_eal/common/eal_common_thread.c
> > index 73a055902..c1044e795 100644
> > --- a/lib/librte_eal/common/eal_common_thread.c
> > +++ b/lib/librte_eal/common/eal_common_thread.c
> > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
> >  	void *(*start_routine)(void *);
> >  	void *arg;
> >  	pthread_barrier_t configured;
> > +	unsigned int refcnt;
> >  };
> >
> > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) {
> > +	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0) {
> > +		pthread_barrier_destroy(&params->configured);
> > +		free(params);
> > +	}
> > +}
> > +
> >  static void *ctrl_thread_init(void *arg)  {
> > -	int ret;
> >  	struct internal_config *internal_conf =
> >  		eal_get_internal_configuration();
> >  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11
> > +192,8 @@ static void *ctrl_thread_init(void *arg)
> >
> >  	__rte_thread_init(rte_lcore_id(), cpuset);
> >
> > -	ret = pthread_barrier_wait(&params->configured);
> > -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> > -		pthread_barrier_destroy(&params->configured);
> > -		free(params);
> > -	}
> > +	pthread_barrier_wait(&params->configured);
> > +	ctrl_params_free(params);
> >
> >  	return start_routine(routine_arg);
> >  }
> > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const
> > char *name,
> >
> >  	params->start_routine = start_routine;
> >  	params->arg = arg;
> > +	params->refcnt = 2;
> >
> > -	pthread_barrier_init(&params->configured, NULL, 2);
> > +	ret = pthread_barrier_init(&params->configured, NULL, 2);
> > +	if (ret != 0)
> > +		goto fail_no_barrier;
> >
> >  	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> > -	if (ret != 0) {
> > -		free(params);
> > -		return -ret;
> > -	}
> > +	if (ret != 0)
> > +		goto fail_with_barrier;
> >
> >  	if (name != NULL) {
> >  		ret = rte_thread_setname(*thread, name); @@ -227,25
> +233,17 @@
> > rte_ctrl_thread_create(pthread_t *thread, const char *name,
> >  	}
> >
> >  	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> > -	if (ret)
> > -		goto fail;
> > +	pthread_barrier_wait(&params->configured);
> > +	ctrl_params_free(params);
> >
> > -	ret = pthread_barrier_wait(&params->configured);
> > -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> > -		pthread_barrier_destroy(&params->configured);
> > -		free(params);
> > -	}
> > +	return -ret;
> 
> I think not killing the thread when pthread_setaffinity_np() returns an error is
> not very understandable from the API user point of view.
Agree.

> 
> What about doing this on top of your patch? The idea is to set start_routine
> to NULL before the barrier if pthread_setaffinity_np() failed. So there is no
> need to cancel the thread, it will exit by itself.
How about using the pthread_attr_setaffinity_np API?
It is deviating from the documentation of the 'rte_ctrl_thread_create'. But, from the user perspective, the behavior should not change.
This way we do not have to handle the error after the thread is launched.

> 
>   @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg)
>                   eal_get_internal_configuration();
>           rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
>           struct rte_thread_ctrl_params *params = arg;
>   -       void *(*start_routine)(void *) = params->start_routine;
>   +       void *(*start_routine)(void *);
>           void *routine_arg = params->arg;
> 
>           __rte_thread_init(rte_lcore_id(), cpuset);
> 
>           pthread_barrier_wait(&params->configured);
>   +       start_routine = params->start_routine;
>           ctrl_params_free(params);
> 
>   +       if (start_routine == NULL)
>   +               return NULL;
>   +
>           return start_routine(routine_arg);
>    }
> 
>   @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
>           }
> 
>           ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
>   +       if (ret != 0)
>   +               params->start_routine = NULL;
>   +
>           pthread_barrier_wait(&params->configured);
>           ctrl_params_free(params);
> 
>   -       return -ret;
>   +       if (ret != 0) {
>   +               pthread_join(*thread, NULL);
>   +               return -ret;
>   +       }
>   +
>   +       return 0;
> 
>    fail_with_barrier:
>           pthread_barrier_destroy(&params->configured);
> 
> 
> Regards,
> Olivier
  

Patch

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 73a055902..c1044e795 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -170,11 +170,19 @@  struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
 	pthread_barrier_t configured;
+	unsigned int refcnt;
 };
 
+static void ctrl_params_free(struct rte_thread_ctrl_params *params)
+{
+	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
+		pthread_barrier_destroy(&params->configured);
+		free(params);
+	}
+}
+
 static void *ctrl_thread_init(void *arg)
 {
-	int ret;
 	struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
@@ -184,11 +192,8 @@  static void *ctrl_thread_init(void *arg)
 
 	__rte_thread_init(rte_lcore_id(), cpuset);
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
-		pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
+	pthread_barrier_wait(&params->configured);
+	ctrl_params_free(params);
 
 	return start_routine(routine_arg);
 }
@@ -210,14 +215,15 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	params->start_routine = start_routine;
 	params->arg = arg;
+	params->refcnt = 2;
 
-	pthread_barrier_init(&params->configured, NULL, 2);
+	ret = pthread_barrier_init(&params->configured, NULL, 2);
+	if (ret != 0)
+		goto fail_no_barrier;
 
 	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
-	if (ret != 0) {
-		free(params);
-		return -ret;
-	}
+	if (ret != 0)
+		goto fail_with_barrier;
 
 	if (name != NULL) {
 		ret = rte_thread_setname(*thread, name);
@@ -227,25 +233,17 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	}
 
 	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
-	if (ret)
-		goto fail;
+	pthread_barrier_wait(&params->configured);
+	ctrl_params_free(params);
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
-		pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
+	return -ret;
 
-	return 0;
+fail_with_barrier:
+	pthread_barrier_destroy(&params->configured);
+
+fail_no_barrier:
+	free(params);
 
-fail:
-	if (PTHREAD_BARRIER_SERIAL_THREAD ==
-	    pthread_barrier_wait(&params->configured)) {
-		pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
-	pthread_cancel(*thread);
-	pthread_join(*thread, NULL);
 	return -ret;
 }