[v2] graph: avoid accessing graph list when getting stats

Message ID 20240401203647.1909165-3-rjarry@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series [v2] graph: avoid accessing graph list when getting stats |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-sample-apps-testing warning Testing issues
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Robin Jarry April 1, 2024, 8:36 p.m. UTC
In rte_graph_cluster_stats_get, the walk model of the first graph is
checked to determine if multi-core dispatch specific counters should be
updated or not. This global list is accessed without any locks.

If the global list is modified by another thread while
rte_graph_cluster_stats_get is called, it can result in undefined
behaviour.

Adding a lock would make it impossible to call
rte_graph_cluster_stats_get in packet processing code paths. Avoid
accessing the global list instead by storing a bool field in the private
rte_graph_cluster_stats structure.

Also update the default callback to avoid accessing the global list and
use a different default callback depending on the graph model.

Signed-off-by: Robin Jarry <rjarry@redhat.com>
---

Notes:
    v2:
    
    * (kiran) removed unnecessary loop in stats_mem_init.

 lib/graph/graph_stats.c | 57 ++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 21 deletions(-)
  

Comments

Kiran Kumar Kokkilagadda April 3, 2024, 7:22 a.m. UTC | #1
> -----Original Message-----
> From: Robin Jarry <rjarry@redhat.com>
> Sent: Tuesday, April 2, 2024 2:07 AM
> To: dev@dpdk.org; Jerin Jacob <jerinj@marvell.com>; Kiran Kumar
> Kokkilagadda <kirankumark@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>; Zhirun Yan <yanzhirun_163@163.com>
> Subject: [EXTERNAL] [PATCH v2] graph: avoid accessing graph list when getting
> stats
> 
> Prioritize security for external emails: Confirm sender and content safety
> before clicking links or opening attachments
> 
> ----------------------------------------------------------------------
> In rte_graph_cluster_stats_get, the walk model of the first graph is checked
> to determine if multi-core dispatch specific counters should be updated or
> not. This global list is accessed without any locks.
> 
> If the global list is modified by another thread while
> rte_graph_cluster_stats_get is called, it can result in undefined behaviour.
> 
> Adding a lock would make it impossible to call rte_graph_cluster_stats_get in
> packet processing code paths. Avoid accessing the global list instead by
> storing a bool field in the private rte_graph_cluster_stats structure.
> 
> Also update the default callback to avoid accessing the global list and use a
> different default callback depending on the graph model.
> 
> Signed-off-by: Robin Jarry <rjarry@redhat.com>
> ---

Acked-by: Kiran Kumar Kokkilagadda <kirankumark@marvell.com>


> 
> Notes:
>     v2:
> 
>     * (kiran) removed unnecessary loop in stats_mem_init.
> 
>  lib/graph/graph_stats.c | 57 ++++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 21 deletions(-)
> 
> diff --git a/lib/graph/graph_stats.c b/lib/graph/graph_stats.c index
> 2fb808b21ec5..d71451a17b95 100644
> --- a/lib/graph/graph_stats.c
> +++ b/lib/graph/graph_stats.c
> @@ -34,6 +34,7 @@ struct __rte_cache_aligned rte_graph_cluster_stats {
>  	uint32_t cluster_node_size; /* Size of struct cluster_node */
>  	rte_node_t max_nodes;
>  	int socket_id;
> +	bool dispatch;
>  	void *cookie;
>  	size_t sz;
> 
> @@ -74,17 +75,16 @@ print_banner_dispatch(FILE *f)  }
> 
>  static inline void
> -print_banner(FILE *f)
> +print_banner(FILE *f, bool dispatch)
>  {
> -	if
> (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph)
> ==
> -	    RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +	if (dispatch)
>  		print_banner_dispatch(f);
>  	else
>  		print_banner_default(f);
>  }
> 
>  static inline void
> -print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
> +print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat,
> +bool dispatch)
>  {
>  	double objs_per_call, objs_per_sec, cycles_per_call, ts_per_hz;
>  	const uint64_t prev_calls = stat->prev_calls; @@ -104,8 +104,7 @@
> print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
>  	objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
>  	objs_per_sec /= 1000000;
> 
> -	if
> (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph)
> ==
> -	    RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +	if (dispatch) {
>  		fprintf(f,
>  			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
>  			"|%-15" PRIu64 "|%-15" PRIu64
> @@ -123,20 +122,17 @@ print_node(FILE *f, const struct
> rte_graph_cluster_node_stats *stat)  }
> 
>  static int
> -graph_cluster_stats_cb(bool is_first, bool is_last, void *cookie,
> +graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void
> +*cookie,
>  		       const struct rte_graph_cluster_node_stats *stat)  {
>  	FILE *f = cookie;
> -	int model;
> -
> -	model =
> rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph);
> 
>  	if (unlikely(is_first))
> -		print_banner(f);
> +		print_banner(f, dispatch);
>  	if (stat->objs)
> -		print_node(f, stat);
> +		print_node(f, stat, dispatch);
>  	if (unlikely(is_last)) {
> -		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +		if (dispatch)
>  			boarder_model_dispatch();
>  		else
>  			boarder();
> @@ -145,6 +141,20 @@ graph_cluster_stats_cb(bool is_first, bool is_last,
> void *cookie,
>  	return 0;
>  };
> 
> +static int
> +graph_cluster_stats_cb_rtc(bool is_first, bool is_last, void *cookie,
> +			   const struct rte_graph_cluster_node_stats *stat) {
> +	return graph_cluster_stats_cb(false, is_first, is_last, cookie, stat);
> +};
> +
> +static int
> +graph_cluster_stats_cb_dispatch(bool is_first, bool is_last, void *cookie,
> +				const struct rte_graph_cluster_node_stats
> *stat) {
> +	return graph_cluster_stats_cb(true, is_first, is_last, cookie, stat);
> +};
> +
>  static struct rte_graph_cluster_stats *  stats_mem_init(struct cluster *cluster,
>  	       const struct rte_graph_cluster_stats_param *prm) @@ -157,8
> +167,13 @@ stats_mem_init(struct cluster *cluster,
> 
>  	/* Fix up callback */
>  	fn = prm->fn;
> -	if (fn == NULL)
> -		fn = graph_cluster_stats_cb;
> +	if (fn == NULL) {
> +		const struct rte_graph *graph = cluster->graphs[0]->graph;
> +		if (graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +			fn = graph_cluster_stats_cb_dispatch;
> +		else
> +			fn = graph_cluster_stats_cb_rtc;
> +	}
> 
>  	cluster_node_size = sizeof(struct cluster_node);
>  	/* For a given cluster, max nodes will be the max number of graphs
> */ @@ -350,6 +365,8 @@ rte_graph_cluster_stats_create(const struct
> rte_graph_cluster_stats_param *prm)
>  			if (stats_mem_populate(&stats, graph_fp,
> graph_node))
>  				goto realloc_fail;
>  		}
> +		if (graph->graph->model ==
> RTE_GRAPH_MODEL_MCORE_DISPATCH)
> +			stats->dispatch = true;
>  	}
> 
>  	/* Finally copy to hugepage memory to avoid pressure on rte_realloc
> */ @@ -375,20 +392,18 @@ rte_graph_cluster_stats_destroy(struct
> rte_graph_cluster_stats *stat)  }
> 
>  static inline void
> -cluster_node_arregate_stats(struct cluster_node *cluster)
> +cluster_node_arregate_stats(struct cluster_node *cluster, bool
> +dispatch)
>  {
>  	uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
>  	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
>  	uint64_t sched_objs = 0, sched_fail = 0;
>  	struct rte_node *node;
>  	rte_node_t count;
> -	int model;
> 
> -	model =
> rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph);
>  	for (count = 0; count < cluster->nb_nodes; count++) {
>  		node = cluster->nodes[count];
> 
> -		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +		if (dispatch) {
>  			sched_objs += node->dispatch.total_sched_objs;
>  			sched_fail += node->dispatch.total_sched_fail;
>  		}
> @@ -403,7 +418,7 @@ cluster_node_arregate_stats(struct cluster_node
> *cluster)
>  	stat->objs = objs;
>  	stat->cycles = cycles;
> 
> -	if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
> +	if (dispatch) {
>  		stat->dispatch.sched_objs = sched_objs;
>  		stat->dispatch.sched_fail = sched_fail;
>  	}
> @@ -433,7 +448,7 @@ rte_graph_cluster_stats_get(struct
> rte_graph_cluster_stats *stat, bool skip_cb)
>  	cluster = stat->clusters;
> 
>  	for (count = 0; count < stat->max_nodes; count++) {
> -		cluster_node_arregate_stats(cluster);
> +		cluster_node_arregate_stats(cluster, stat->dispatch);
>  		if (!skip_cb)
>  			rc = stat->fn(!count, (count == stat->max_nodes - 1),
>  				      stat->cookie, &cluster->stat);
> --
> 2.44.0
  
David Marchand June 18, 2024, 1:15 p.m. UTC | #2
On Mon, Apr 1, 2024 at 10:37 PM Robin Jarry <rjarry@redhat.com> wrote:
>
> In rte_graph_cluster_stats_get, the walk model of the first graph is
> checked to determine if multi-core dispatch specific counters should be
> updated or not. This global list is accessed without any locks.
>
> If the global list is modified by another thread while
> rte_graph_cluster_stats_get is called, it can result in undefined
> behaviour.
>
> Adding a lock would make it impossible to call
> rte_graph_cluster_stats_get in packet processing code paths. Avoid
> accessing the global list instead by storing a bool field in the private
> rte_graph_cluster_stats structure.
>
> Also update the default callback to avoid accessing the global list and
> use a different default callback depending on the graph model.

Fixes: 358ff83fe88c ("graph: add stats for mcore dispatch model")
Cc: stable@dpdk.org

>
> Signed-off-by: Robin Jarry <rjarry@redhat.com>

Acked-by: Kiran Kumar Kokkilagadda <kirankumark@marvell.com>

Applied, thanks.


This may be worth a unit test, though I don't think it is trivial.
  

Patch

diff --git a/lib/graph/graph_stats.c b/lib/graph/graph_stats.c
index 2fb808b21ec5..d71451a17b95 100644
--- a/lib/graph/graph_stats.c
+++ b/lib/graph/graph_stats.c
@@ -34,6 +34,7 @@  struct __rte_cache_aligned rte_graph_cluster_stats {
 	uint32_t cluster_node_size; /* Size of struct cluster_node */
 	rte_node_t max_nodes;
 	int socket_id;
+	bool dispatch;
 	void *cookie;
 	size_t sz;
 
@@ -74,17 +75,16 @@  print_banner_dispatch(FILE *f)
 }
 
 static inline void
-print_banner(FILE *f)
+print_banner(FILE *f, bool dispatch)
 {
-	if (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph) ==
-	    RTE_GRAPH_MODEL_MCORE_DISPATCH)
+	if (dispatch)
 		print_banner_dispatch(f);
 	else
 		print_banner_default(f);
 }
 
 static inline void
-print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
+print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat, bool dispatch)
 {
 	double objs_per_call, objs_per_sec, cycles_per_call, ts_per_hz;
 	const uint64_t prev_calls = stat->prev_calls;
@@ -104,8 +104,7 @@  print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
 	objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
 	objs_per_sec /= 1000000;
 
-	if (rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph) ==
-	    RTE_GRAPH_MODEL_MCORE_DISPATCH) {
+	if (dispatch) {
 		fprintf(f,
 			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
 			"|%-15" PRIu64 "|%-15" PRIu64
@@ -123,20 +122,17 @@  print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
 }
 
 static int
-graph_cluster_stats_cb(bool is_first, bool is_last, void *cookie,
+graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void *cookie,
 		       const struct rte_graph_cluster_node_stats *stat)
 {
 	FILE *f = cookie;
-	int model;
-
-	model = rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph);
 
 	if (unlikely(is_first))
-		print_banner(f);
+		print_banner(f, dispatch);
 	if (stat->objs)
-		print_node(f, stat);
+		print_node(f, stat, dispatch);
 	if (unlikely(is_last)) {
-		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
+		if (dispatch)
 			boarder_model_dispatch();
 		else
 			boarder();
@@ -145,6 +141,20 @@  graph_cluster_stats_cb(bool is_first, bool is_last, void *cookie,
 	return 0;
 };
 
+static int
+graph_cluster_stats_cb_rtc(bool is_first, bool is_last, void *cookie,
+			   const struct rte_graph_cluster_node_stats *stat)
+{
+	return graph_cluster_stats_cb(false, is_first, is_last, cookie, stat);
+};
+
+static int
+graph_cluster_stats_cb_dispatch(bool is_first, bool is_last, void *cookie,
+				const struct rte_graph_cluster_node_stats *stat)
+{
+	return graph_cluster_stats_cb(true, is_first, is_last, cookie, stat);
+};
+
 static struct rte_graph_cluster_stats *
 stats_mem_init(struct cluster *cluster,
 	       const struct rte_graph_cluster_stats_param *prm)
@@ -157,8 +167,13 @@  stats_mem_init(struct cluster *cluster,
 
 	/* Fix up callback */
 	fn = prm->fn;
-	if (fn == NULL)
-		fn = graph_cluster_stats_cb;
+	if (fn == NULL) {
+		const struct rte_graph *graph = cluster->graphs[0]->graph;
+		if (graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
+			fn = graph_cluster_stats_cb_dispatch;
+		else
+			fn = graph_cluster_stats_cb_rtc;
+	}
 
 	cluster_node_size = sizeof(struct cluster_node);
 	/* For a given cluster, max nodes will be the max number of graphs */
@@ -350,6 +365,8 @@  rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm)
 			if (stats_mem_populate(&stats, graph_fp, graph_node))
 				goto realloc_fail;
 		}
+		if (graph->graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
+			stats->dispatch = true;
 	}
 
 	/* Finally copy to hugepage memory to avoid pressure on rte_realloc */
@@ -375,20 +392,18 @@  rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat)
 }
 
 static inline void
-cluster_node_arregate_stats(struct cluster_node *cluster)
+cluster_node_arregate_stats(struct cluster_node *cluster, bool dispatch)
 {
 	uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
 	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
 	uint64_t sched_objs = 0, sched_fail = 0;
 	struct rte_node *node;
 	rte_node_t count;
-	int model;
 
-	model = rte_graph_worker_model_get(STAILQ_FIRST(graph_list_head_get())->graph);
 	for (count = 0; count < cluster->nb_nodes; count++) {
 		node = cluster->nodes[count];
 
-		if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
+		if (dispatch) {
 			sched_objs += node->dispatch.total_sched_objs;
 			sched_fail += node->dispatch.total_sched_fail;
 		}
@@ -403,7 +418,7 @@  cluster_node_arregate_stats(struct cluster_node *cluster)
 	stat->objs = objs;
 	stat->cycles = cycles;
 
-	if (model == RTE_GRAPH_MODEL_MCORE_DISPATCH) {
+	if (dispatch) {
 		stat->dispatch.sched_objs = sched_objs;
 		stat->dispatch.sched_fail = sched_fail;
 	}
@@ -433,7 +448,7 @@  rte_graph_cluster_stats_get(struct rte_graph_cluster_stats *stat, bool skip_cb)
 	cluster = stat->clusters;
 
 	for (count = 0; count < stat->max_nodes; count++) {
-		cluster_node_arregate_stats(cluster);
+		cluster_node_arregate_stats(cluster, stat->dispatch);
 		if (!skip_cb)
 			rc = stat->fn(!count, (count == stat->max_nodes - 1),
 				      stat->cookie, &cluster->stat);