[dpdk-dev] Change alarm cancel function to thread-safe.

Message ID 1411484549-711-1-git-send-email-michalx.k.jastrzebski@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Michal Jastrzebski Sept. 23, 2014, 3:02 p.m. UTC
It eliminates a race between threads using rte_alarm_cancel and rte_alarm_set.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>
---
 lib/librte_eal/common/include/rte_alarm.h |    3 +-
 lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 +++++++++++++++++++----------
 2 files changed, 46 insertions(+), 25 deletions(-)
  

Comments

Ananyev, Konstantin Sept. 24, 2014, 3:18 p.m. UTC | #1
Hi Michal,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Tuesday, September 23, 2014 4:02 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] Change alarm cancel function to thread-safe.
> 
> It eliminates a race between threads using rte_alarm_cancel and rte_alarm_set.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>


The patch looks good, but I think it is incomplete.
At rte_eal_alarm_set(), for newly allocated alarm_entry we never reset value of 'executing' to zero.
Yes, it seems  that problem is not new, and was here for a while.
Probably the easiest way to fix it:

@@ -150,7 +150,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
        if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
                return -EINVAL;

-       new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
+       new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
        if (new_alarm == NULL)
                return -ENOMEM;

Plus two nits, see below.

Thanks
Konstantin

> ---
>  lib/librte_eal/common/include/rte_alarm.h |    3 +-
>  lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 +++++++++++++++++++----------
>  2 files changed, 46 insertions(+), 25 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
> index d451522..f5f7de4 100644
> --- a/lib/librte_eal/common/include/rte_alarm.h
> +++ b/lib/librte_eal/common/include/rte_alarm.h
> @@ -76,7 +76,8 @@ typedef void (*rte_eal_alarm_callback)(void *arg);
>  int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
> 
>  /**
> - * Function to cancel an alarm callback which has been registered before.
> + * Function to cancel an alarm callback which has been registered before. If
> + * used ouside alarm callback it wait for all callbacks to finish its execution.

s/ouside/outside/


>   *
>   * @param cb_fn
>   *  alarm callback
> diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> index 480f0cb..0561dbf 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> @@ -69,12 +69,14 @@ struct alarm_entry {
>  	struct timeval time;
>  	rte_eal_alarm_callback cb_fn;
>  	void *cb_arg;
> -	volatile int executing;
> +	volatile uint8_t executing;
> +	volatile pthread_t executing_id;
>  };
> 
>  static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
>  static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
> 
> +
>  static struct rte_intr_handle intr_handle = {.fd = -1 };
>  static int handler_registered = 0;
>  static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
> @@ -108,11 +110,14 @@ eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
>  			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
>  						ap->time.tv_usec <= now.tv_usec))){
>  		ap->executing = 1;
> +		ap->executing_id = pthread_self();
>  		rte_spinlock_unlock(&alarm_list_lk);
> 
>  		ap->cb_fn(ap->cb_arg);
> 
>  		rte_spinlock_lock(&alarm_list_lk);
> +		ap->executing = 0;
> +

I don't think you need:
ap->executing = 0
here.
You are going to free ap anyway. 

>  		LIST_REMOVE(ap, next);
>  		rte_free(ap);
>  	}
> @@ -156,7 +161,6 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
>  	new_alarm->cb_arg = cb_arg;
>  	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
>  	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
> -	new_alarm->executing = 0;
> 
>  	rte_spinlock_lock(&alarm_list_lk);
>  	if (!handler_registered) {
> @@ -202,34 +206,50 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
>  {
>  	struct alarm_entry *ap, *ap_prev;
>  	int count = 0;
> +	int executing;
> 
>  	if (!cb_fn)
>  		return -1;
> 
> -	rte_spinlock_lock(&alarm_list_lk);
> -	/* remove any matches at the start of the list */
> -	while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
> -			cb_fn == ap->cb_fn && ap->executing == 0 &&
> -			(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> -		LIST_REMOVE(ap, next);
> -		rte_free(ap);
> -		count++;
> -	}
> -	ap_prev = ap;
> -
> -	/* now go through list, removing entries not at start */
> -	LIST_FOREACH(ap, &alarm_list, next) {
> -		/* this won't be true first time through */
> -		if (cb_fn == ap->cb_fn &&  ap->executing == 0 &&
> +	do {
> +		executing = 0;
> +		rte_spinlock_lock(&alarm_list_lk);
> +		/* remove any matches at the start of the list */
> +		while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
> +				cb_fn == ap->cb_fn &&
>  				(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> -			LIST_REMOVE(ap,next);
> -			rte_free(ap);
> -			count++;
> -			ap = ap_prev;
> +
> +			if (ap->executing == 0) {
> +				LIST_REMOVE(ap, next);
> +				rte_free(ap);
> +				count++;
> +			} else {
> +				if (pthread_equal(ap->executing_id, pthread_self()) == 0)
> +					executing++;
> +
> +				break;
> +			}
>  		}
>  		ap_prev = ap;
> -	}
> -	rte_spinlock_unlock(&alarm_list_lk);
> +
> +		/* now go through list, removing entries not at start */
> +		LIST_FOREACH(ap, &alarm_list, next) {
> +			/* this won't be true first time through */
> +			if (cb_fn == ap->cb_fn &&
> +					(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> +
> +				if (ap->executing == 0) {
> +					LIST_REMOVE(ap, next);
> +					rte_free(ap);
> +					count++;
> +					ap = ap_prev;
> +				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
> +					executing++;
> +			}
> +			ap_prev = ap;
> +		}
> +		rte_spinlock_unlock(&alarm_list_lk);
> +	} while (executing != 0);
> +
>  	return count;
>  }
> -
> --
> 1.7.9.5
  

Patch

diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
index d451522..f5f7de4 100644
--- a/lib/librte_eal/common/include/rte_alarm.h
+++ b/lib/librte_eal/common/include/rte_alarm.h
@@ -76,7 +76,8 @@  typedef void (*rte_eal_alarm_callback)(void *arg);
 int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
 
 /**
- * Function to cancel an alarm callback which has been registered before.
+ * Function to cancel an alarm callback which has been registered before. If
+ * used ouside alarm callback it wait for all callbacks to finish its execution.
  *
  * @param cb_fn
  *  alarm callback
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index 480f0cb..0561dbf 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -69,12 +69,14 @@  struct alarm_entry {
 	struct timeval time;
 	rte_eal_alarm_callback cb_fn;
 	void *cb_arg;
-	volatile int executing;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
 };
 
 static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
 static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
 
+
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static int handler_registered = 0;
 static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
@@ -108,11 +110,14 @@  eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
 			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
 						ap->time.tv_usec <= now.tv_usec))){
 		ap->executing = 1;
+		ap->executing_id = pthread_self();
 		rte_spinlock_unlock(&alarm_list_lk);
 
 		ap->cb_fn(ap->cb_arg);
 
 		rte_spinlock_lock(&alarm_list_lk);
+		ap->executing = 0;
+
 		LIST_REMOVE(ap, next);
 		rte_free(ap);
 	}
@@ -156,7 +161,6 @@  rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 	new_alarm->cb_arg = cb_arg;
 	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
 	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
-	new_alarm->executing = 0;
 
 	rte_spinlock_lock(&alarm_list_lk);
 	if (!handler_registered) {
@@ -202,34 +206,50 @@  rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
 	struct alarm_entry *ap, *ap_prev;
 	int count = 0;
+	int executing;
 
 	if (!cb_fn)
 		return -1;
 
-	rte_spinlock_lock(&alarm_list_lk);
-	/* remove any matches at the start of the list */
-	while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
-			cb_fn == ap->cb_fn && ap->executing == 0 &&
-			(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
-		LIST_REMOVE(ap, next);
-		rte_free(ap);
-		count++;
-	}
-	ap_prev = ap;
-
-	/* now go through list, removing entries not at start */
-	LIST_FOREACH(ap, &alarm_list, next) {
-		/* this won't be true first time through */
-		if (cb_fn == ap->cb_fn &&  ap->executing == 0 &&
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+				cb_fn == ap->cb_fn &&
 				(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
-			LIST_REMOVE(ap,next);
-			rte_free(ap);
-			count++;
-			ap = ap_prev;
+
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				rte_free(ap);
+				count++;
+			} else {
+				if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+
+				break;
+			}
 		}
 		ap_prev = ap;
-	}
-	rte_spinlock_unlock(&alarm_list_lk);
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap, next);
+					rte_free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
 	return count;
 }
-