[v4,2/3] lib/lpm: memory orderings to avoid race conditions for v20

Message ID 20190703054441.30162-2-ruifeng.wang@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v4,1/3] lib/lpm: memory orderings to avoid race conditions for v1604 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Ruifeng Wang July 3, 2019, 5:44 a.m. UTC
  When a tbl8 group is getting attached to a tbl24 entry, lookup
might fail even though the entry is configured in the table.

For ex: consider a LPM table configured with 10.10.10.1/24.
When a new entry 10.10.10.32/28 is being added, a new tbl8
group is allocated and tbl24 entry is changed to point to
the tbl8 group. If the tbl24 entry is written without the tbl8
group entries updated, a lookup on 10.10.10.9 will return
failure.

Correct memory orderings are required to ensure that the
store to tbl24 does not happen before the stores to tbl8 group
entries complete.

Besides, explicit structure alignment is used to address atomic
operation building issue with older version clang.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
v4: changed alignment attribute parameter
v3: no changes
v2: fixed clang building issue by supplying alignment attribute.

 lib/librte_lpm/rte_lpm.c | 32 +++++++++++++++++++++++++-------
 lib/librte_lpm/rte_lpm.h |  4 ++--
 2 files changed, 27 insertions(+), 9 deletions(-)
  

Comments

Vladimir Medvedkin July 5, 2019, 4:52 p.m. UTC | #1
Hi Wang,

On 03/07/2019 06:44, Ruifeng Wang wrote:
> When a tbl8 group is getting attached to a tbl24 entry, lookup
> might fail even though the entry is configured in the table.
>
> For ex: consider a LPM table configured with 10.10.10.1/24.
> When a new entry 10.10.10.32/28 is being added, a new tbl8
> group is allocated and tbl24 entry is changed to point to
> the tbl8 group. If the tbl24 entry is written without the tbl8
> group entries updated, a lookup on 10.10.10.9 will return
> failure.
>
> Correct memory orderings are required to ensure that the
> store to tbl24 does not happen before the stores to tbl8 group
> entries complete.
>
> Besides, explicit structure alignment is used to address atomic
> operation building issue with older version clang.
>
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
> v4: changed alignment attribute parameter
> v3: no changes
> v2: fixed clang building issue by supplying alignment attribute.
>
>   lib/librte_lpm/rte_lpm.c | 32 +++++++++++++++++++++++++-------
>   lib/librte_lpm/rte_lpm.h |  4 ++--
>   2 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
> index 6ec450a08..baa6e7460 100644
> --- a/lib/librte_lpm/rte_lpm.c
> +++ b/lib/librte_lpm/rte_lpm.c
> @@ -737,7 +737,8 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
>   			/* Setting tbl24 entry in one go to avoid race
>   			 * conditions
>   			 */
> -			lpm->tbl24[i] = new_tbl24_entry;
> +			__atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
> +					__ATOMIC_RELEASE);
>   
>   			continue;
>   		}
> @@ -892,7 +893,8 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   			.depth = 0,
>   		};
>   
> -		lpm->tbl24[tbl24_index] = new_tbl24_entry;
> +		__atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
> +				__ATOMIC_RELEASE);
>   
>   	} /* If valid entry but not extended calculate the index into Table8. */
>   	else if (lpm->tbl24[tbl24_index].valid_group == 0) {
> @@ -938,7 +940,8 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   				.depth = 0,
>   		};
>   
> -		lpm->tbl24[tbl24_index] = new_tbl24_entry;
> +		__atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
> +				__ATOMIC_RELEASE);
>   
>   	} else { /*
>   		* If it is valid, extended entry calculate the index into tbl8.
> @@ -1320,7 +1323,15 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   
>   			if (lpm->tbl24[i].valid_group == 0 &&
>   					lpm->tbl24[i].depth <= depth) {
> -				lpm->tbl24[i].valid = INVALID;
> +				struct rte_lpm_tbl_entry_v20
> +					zero_tbl24_entry = {
> +						.valid = INVALID,
> +						.depth = 0,
> +						.valid_group = 0,
> +					};
> +					zero_tbl24_entry.next_hop = 0;
Why don't you use just "struct rte_lpm_tbl_entry_v20 zero_tbl24_entry = 
{0} " like you do for _v1604?
> +				__atomic_store(&lpm->tbl24[i],
> +					&zero_tbl24_entry, __ATOMIC_RELEASE);
>   			} else if (lpm->tbl24[i].valid_group == 1) {
>   				/*
>   				 * If TBL24 entry is extended, then there has
> @@ -1365,7 +1376,8 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   
>   			if (lpm->tbl24[i].valid_group == 0 &&
>   					lpm->tbl24[i].depth <= depth) {
> -				lpm->tbl24[i] = new_tbl24_entry;
> +				__atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
> +						__ATOMIC_RELEASE);
>   			} else  if (lpm->tbl24[i].valid_group == 1) {
>   				/*
>   				 * If TBL24 entry is extended, then there has
> @@ -1647,8 +1659,11 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   	tbl8_recycle_index = tbl8_recycle_check_v20(lpm->tbl8, tbl8_group_start);
>   
>   	if (tbl8_recycle_index == -EINVAL) {
> -		/* Set tbl24 before freeing tbl8 to avoid race condition. */
> +		/* Set tbl24 before freeing tbl8 to avoid race condition.
> +		 * Prevent the free of the tbl8 group from hoisting.
> +		 */
>   		lpm->tbl24[tbl24_index].valid = 0;
> +		__atomic_thread_fence(__ATOMIC_RELEASE);
>   		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
>   	} else if (tbl8_recycle_index > -1) {
>   		/* Update tbl24 entry. */
> @@ -1659,8 +1674,11 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   			.depth = lpm->tbl8[tbl8_recycle_index].depth,
>   		};
>   
> -		/* Set tbl24 before freeing tbl8 to avoid race condition. */
> +		/* Set tbl24 before freeing tbl8 to avoid race condition.
> +		 * Prevent the free of the tbl8 group from hoisting.
> +		 */
>   		lpm->tbl24[tbl24_index] = new_tbl24_entry;
> +		__atomic_thread_fence(__ATOMIC_RELEASE);
>   		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
>   	}
>   
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 6f5704c5c..906ec4483 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -88,7 +88,7 @@ struct rte_lpm_tbl_entry_v20 {
>   	 */
>   	uint8_t valid_group :1;
>   	uint8_t depth       :6; /**< Rule depth. */
> -};
> +} __rte_aligned(sizeof(uint16_t));
>   
>   __extension__
>   struct rte_lpm_tbl_entry {
> @@ -121,7 +121,7 @@ struct rte_lpm_tbl_entry_v20 {
>   		uint8_t group_idx;
>   		uint8_t next_hop;
>   	};
> -};
> +} __rte_aligned(sizeof(uint16_t));
>   
>   __extension__
>   struct rte_lpm_tbl_entry {
  
Vladimir Medvedkin July 5, 2019, 6:20 p.m. UTC | #2
Hi all,

пт, 5 июл. 2019 г. в 17:52, Medvedkin, Vladimir <
vladimir.medvedkin@intel.com>:

> Hi Wang,
>
> On 03/07/2019 06:44, Ruifeng Wang wrote:
> > When a tbl8 group is getting attached to a tbl24 entry, lookup
> > might fail even though the entry is configured in the table.
> >
> > For ex: consider a LPM table configured with 10.10.10.1/24.
> > When a new entry 10.10.10.32/28 is being added, a new tbl8
> > group is allocated and tbl24 entry is changed to point to
> > the tbl8 group. If the tbl24 entry is written without the tbl8
> > group entries updated, a lookup on 10.10.10.9 will return
> > failure.
> >
> > Correct memory orderings are required to ensure that the
> > store to tbl24 does not happen before the stores to tbl8 group
> > entries complete.
> >
> > Besides, explicit structure alignment is used to address atomic
> > operation building issue with older version clang.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > ---
> > v4: changed alignment attribute parameter
> > v3: no changes
> > v2: fixed clang building issue by supplying alignment attribute.
> >
> >   lib/librte_lpm/rte_lpm.c | 32 +++++++++++++++++++++++++-------
> >   lib/librte_lpm/rte_lpm.h |  4 ++--
> >   2 files changed, 27 insertions(+), 9 deletions(-)
> >
> > diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
> > index 6ec450a08..baa6e7460 100644
> > --- a/lib/librte_lpm/rte_lpm.c
> > +++ b/lib/librte_lpm/rte_lpm.c
> > @@ -737,7 +737,8 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm,
> uint32_t ip, uint8_t depth,
> >                       /* Setting tbl24 entry in one go to avoid race
> >                        * conditions
> >                        */
> > -                     lpm->tbl24[i] = new_tbl24_entry;
> > +                     __atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
> > +                                     __ATOMIC_RELEASE);
> >
> >                       continue;
> >               }
> > @@ -892,7 +893,8 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t
> ip_masked, uint8_t depth,
> >                       .depth = 0,
> >               };
> >
> > -             lpm->tbl24[tbl24_index] = new_tbl24_entry;
> > +             __atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
> > +                             __ATOMIC_RELEASE);
> >
> >       } /* If valid entry but not extended calculate the index into
> Table8. */
> >       else if (lpm->tbl24[tbl24_index].valid_group == 0) {
> > @@ -938,7 +940,8 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t
> ip_masked, uint8_t depth,
> >                               .depth = 0,
> >               };
> >
> > -             lpm->tbl24[tbl24_index] = new_tbl24_entry;
> > +             __atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
> > +                             __ATOMIC_RELEASE);
> >
> >       } else { /*
> >               * If it is valid, extended entry calculate the index into
> tbl8.
> > @@ -1320,7 +1323,15 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm,
> uint32_t ip_masked,
> >
> >                       if (lpm->tbl24[i].valid_group == 0 &&
> >                                       lpm->tbl24[i].depth <= depth) {
> > -                             lpm->tbl24[i].valid = INVALID;
> > +                             struct rte_lpm_tbl_entry_v20
> > +                                     zero_tbl24_entry = {
> > +                                             .valid = INVALID,
> > +                                             .depth = 0,
> > +                                             .valid_group = 0,
> > +                                     };
> > +                                     zero_tbl24_entry.next_hop = 0;
> Why don't you use just "struct rte_lpm_tbl_entry_v20 zero_tbl24_entry =
> {0} " like you do for _v1604?
>
Ah, sorry, just found what you mentioned earlier, this will cause meson
build to fail.

> > +                             __atomic_store(&lpm->tbl24[i],
> > +                                     &zero_tbl24_entry,
> __ATOMIC_RELEASE);
> >                       } else if (lpm->tbl24[i].valid_group == 1) {
> >                               /*
> >                                * If TBL24 entry is extended, then there
> has
> > @@ -1365,7 +1376,8 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm,
> uint32_t ip_masked,
> >
> >                       if (lpm->tbl24[i].valid_group == 0 &&
> >                                       lpm->tbl24[i].depth <= depth) {
> > -                             lpm->tbl24[i] = new_tbl24_entry;
> > +                             __atomic_store(&lpm->tbl24[i],
> &new_tbl24_entry,
> > +                                             __ATOMIC_RELEASE);
> >                       } else  if (lpm->tbl24[i].valid_group == 1) {
> >                               /*
> >                                * If TBL24 entry is extended, then there
> has
> > @@ -1647,8 +1659,11 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm,
> uint32_t ip_masked,
> >       tbl8_recycle_index = tbl8_recycle_check_v20(lpm->tbl8,
> tbl8_group_start);
> >
> >       if (tbl8_recycle_index == -EINVAL) {
> > -             /* Set tbl24 before freeing tbl8 to avoid race condition.
> */
> > +             /* Set tbl24 before freeing tbl8 to avoid race condition.
> > +              * Prevent the free of the tbl8 group from hoisting.
> > +              */
> >               lpm->tbl24[tbl24_index].valid = 0;
> > +             __atomic_thread_fence(__ATOMIC_RELEASE);
> >               tbl8_free_v20(lpm->tbl8, tbl8_group_start);
> >       } else if (tbl8_recycle_index > -1) {
> >               /* Update tbl24 entry. */
> > @@ -1659,8 +1674,11 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm,
> uint32_t ip_masked,
> >                       .depth = lpm->tbl8[tbl8_recycle_index].depth,
> >               };
> >
> > -             /* Set tbl24 before freeing tbl8 to avoid race condition.
> */
> > +             /* Set tbl24 before freeing tbl8 to avoid race condition.
> > +              * Prevent the free of the tbl8 group from hoisting.
> > +              */
> >               lpm->tbl24[tbl24_index] = new_tbl24_entry;
> > +             __atomic_thread_fence(__ATOMIC_RELEASE);
> >               tbl8_free_v20(lpm->tbl8, tbl8_group_start);
> >       }
> >
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> > index 6f5704c5c..906ec4483 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -88,7 +88,7 @@ struct rte_lpm_tbl_entry_v20 {
> >        */
> >       uint8_t valid_group :1;
> >       uint8_t depth       :6; /**< Rule depth. */
> > -};
> > +} __rte_aligned(sizeof(uint16_t));
> >
> >   __extension__
> >   struct rte_lpm_tbl_entry {
> > @@ -121,7 +121,7 @@ struct rte_lpm_tbl_entry_v20 {
> >               uint8_t group_idx;
> >               uint8_t next_hop;
> >       };
> > -};
> > +} __rte_aligned(sizeof(uint16_t));
> >
> >   __extension__
> >   struct rte_lpm_tbl_entry {
>
> --
> Regards,
> Vladimir
>
>
  

Patch

diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 6ec450a08..baa6e7460 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -737,7 +737,8 @@  add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
 			/* Setting tbl24 entry in one go to avoid race
 			 * conditions
 			 */
-			lpm->tbl24[i] = new_tbl24_entry;
+			__atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
+					__ATOMIC_RELEASE);
 
 			continue;
 		}
@@ -892,7 +893,8 @@  add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
 			.depth = 0,
 		};
 
-		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+		__atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
+				__ATOMIC_RELEASE);
 
 	} /* If valid entry but not extended calculate the index into Table8. */
 	else if (lpm->tbl24[tbl24_index].valid_group == 0) {
@@ -938,7 +940,8 @@  add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
 				.depth = 0,
 		};
 
-		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+		__atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
+				__ATOMIC_RELEASE);
 
 	} else { /*
 		* If it is valid, extended entry calculate the index into tbl8.
@@ -1320,7 +1323,15 @@  delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
 
 			if (lpm->tbl24[i].valid_group == 0 &&
 					lpm->tbl24[i].depth <= depth) {
-				lpm->tbl24[i].valid = INVALID;
+				struct rte_lpm_tbl_entry_v20
+					zero_tbl24_entry = {
+						.valid = INVALID,
+						.depth = 0,
+						.valid_group = 0,
+					};
+					zero_tbl24_entry.next_hop = 0;
+				__atomic_store(&lpm->tbl24[i],
+					&zero_tbl24_entry, __ATOMIC_RELEASE);
 			} else if (lpm->tbl24[i].valid_group == 1) {
 				/*
 				 * If TBL24 entry is extended, then there has
@@ -1365,7 +1376,8 @@  delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
 
 			if (lpm->tbl24[i].valid_group == 0 &&
 					lpm->tbl24[i].depth <= depth) {
-				lpm->tbl24[i] = new_tbl24_entry;
+				__atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
+						__ATOMIC_RELEASE);
 			} else  if (lpm->tbl24[i].valid_group == 1) {
 				/*
 				 * If TBL24 entry is extended, then there has
@@ -1647,8 +1659,11 @@  delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
 	tbl8_recycle_index = tbl8_recycle_check_v20(lpm->tbl8, tbl8_group_start);
 
 	if (tbl8_recycle_index == -EINVAL) {
-		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		/* Set tbl24 before freeing tbl8 to avoid race condition.
+		 * Prevent the free of the tbl8 group from hoisting.
+		 */
 		lpm->tbl24[tbl24_index].valid = 0;
+		__atomic_thread_fence(__ATOMIC_RELEASE);
 		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
 	} else if (tbl8_recycle_index > -1) {
 		/* Update tbl24 entry. */
@@ -1659,8 +1674,11 @@  delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
 			.depth = lpm->tbl8[tbl8_recycle_index].depth,
 		};
 
-		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		/* Set tbl24 before freeing tbl8 to avoid race condition.
+		 * Prevent the free of the tbl8 group from hoisting.
+		 */
 		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+		__atomic_thread_fence(__ATOMIC_RELEASE);
 		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
 	}
 
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 6f5704c5c..906ec4483 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -88,7 +88,7 @@  struct rte_lpm_tbl_entry_v20 {
 	 */
 	uint8_t valid_group :1;
 	uint8_t depth       :6; /**< Rule depth. */
-};
+} __rte_aligned(sizeof(uint16_t));
 
 __extension__
 struct rte_lpm_tbl_entry {
@@ -121,7 +121,7 @@  struct rte_lpm_tbl_entry_v20 {
 		uint8_t group_idx;
 		uint8_t next_hop;
 	};
-};
+} __rte_aligned(sizeof(uint16_t));
 
 __extension__
 struct rte_lpm_tbl_entry {