[v2] graph: mcore: optimize graph search

Message ID 1731297797-3662-1-git-send-email-chcchc88@163.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] graph: mcore: optimize graph search |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-sample-apps-testing warning Testing issues
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-amd64-testing warning Testing issues
ci/iol-compile-arm64-testing success Testing PASS

Commit Message

Huichao Cai Nov. 11, 2024, 4:03 a.m. UTC
From: Huichao cai <chcchc88@163.com>

In the function __rte_graph_mcore_dispatch_sched_node_enqueue,
use a slower loop to search for the graph, modify the search logic
to record the result of the first search, and use this record for
subsequent searches to improve search speed.

Signed-off-by: Huichao cai <chcchc88@163.com>
---
 lib/graph/rte_graph_model_mcore_dispatch.c | 11 +++++++----
 lib/graph/rte_graph_worker_common.h        |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)
  

Comments

Jerin Jacob Nov. 11, 2024, 5:46 a.m. UTC | #1
> -----Original Message-----
> From: Huichao Cai <chcchc88@163.com>
> Sent: Monday, November 11, 2024 9:33 AM
> To: Jerin Jacob <jerinj@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>; yanzhirun_163@163.com
> Cc: dev@dpdk.org; Huichao cai <chcchc88@163.com>
> Subject: [EXTERNAL] [PATCH v2] graph: mcore: optimize graph search
> 
> From: Huichao cai <chcchc88@ 163. com> In the function
> __rte_graph_mcore_dispatch_sched_node_enqueue, use a slower loop to
> search for the graph, modify the search logic to record the result of the first
> search, and use this record for subsequent 
> From: Huichao cai <chcchc88@163.com>
> 
> In the function __rte_graph_mcore_dispatch_sched_node_enqueue,
> use a slower loop to search for the graph, modify the search logic to record the
> result of the first search, and use this record for subsequent searches to
> improve search speed.
> 
> Signed-off-by: Huichao cai <chcchc88@163.com>
> ---
>  lib/graph/rte_graph_model_mcore_dispatch.c | 11 +++++++----
>  lib/graph/rte_graph_worker_common.h        |  1 +
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.c
> b/lib/graph/rte_graph_model_mcore_dispatch.c
> index a590fc9..a81d338 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.c
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.c
> @@ -118,11 +118,14 @@
>  					      struct rte_graph_rq_head *rq)  {
>  	const unsigned int lcore_id = node->dispatch.lcore_id;
> -	struct rte_graph *graph;
> +	struct rte_graph *graph = node->dispatch.graph;
> 
> -	SLIST_FOREACH(graph, rq, next)
> -		if (graph->dispatch.lcore_id == lcore_id)
> -			break;
> +	if (unlikely((!graph) || (graph->dispatch.lcore_id != lcore_id))) {
> +		SLIST_FOREACH(graph, rq, next)
> +			if (graph->dispatch.lcore_id == lcore_id)
> +				break;
> +		node->dispatch.graph = graph;
> +	}
> 
>  	return graph != NULL ? __graph_sched_node_enqueue(node, graph) :
> false;  } diff --git a/lib/graph/rte_graph_worker_common.h
> b/lib/graph/rte_graph_worker_common.h
> index a518af2..4c2432b 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -110,6 +110,7 @@ struct __rte_cache_aligned rte_node {
>  			unsigned int lcore_id;  /**< Node running lcore. */
>  			uint64_t total_sched_objs; /**< Number of objects
> scheduled. */
>  			uint64_t total_sched_fail; /**< Number of scheduled
> failure. */
> +			struct rte_graph *graph;  /**< Graph corresponding to
> lcore_id. */

Need to conclude the ABI related discussion here before making change
 https://patches.dpdk.org/project/dpdk/patch/1730966682-2632-1-git-send-email-chcchc88@163.com/

>  		} dispatch;
>  	};
>  	rte_graph_off_t xstat_off; /**< Offset to xstat counters. */
> --
> 1.8.3.1
  
Huichao Cai Nov. 13, 2024, 9:19 a.m. UTC | #2
> [main] [dpdk.org] $ git diff
> diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
> index a518af2b2a..ec9a82186d 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -104,6 +104,7 @@ struct __rte_cache_aligned rte_node {
>         /** Original process function when pcap is enabled. */
>         rte_node_process_t original_process;

> +       alignas(RTE_CACHE_LINE_MIN_SIZE)
>         union {

Hi, Jerin
The C++standard cannot align anonymous unions. Do we need to fill in reserved fields in order to maintain union alignment with RTE-CAHE_LINE_LIN_SIZE bytes?

>                 /* Fast schedule area for mcore dispatch model */
>                 struct {
> @@ -112,6 +113,7 @@ struct __rte_cache_aligned rte_node {
>                         uint64_t total_sched_fail; /**< Number of scheduled failure. */
>                 } dispatch;
>         };
> +       alignas(RTE_CACHE_LINE_MIN_SIZE)
>         rte_graph_off_t xstat_off; /**< Offset to xstat counters. */
>         /* Fast path area  */
>         __extension__ struct __rte_cache_aligned {


FAILED: buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o 
ccache c++ -Ibuildtools/chkincs/chkincs-cpp.p -Ibuildtools/chkincs -I../buildtools/chkincs -Iexamples/l3fwd -I../examples/l3fwd -I../examples/common -Idrivers/bus/vdev -I../drivers/bus/vdev -I. -I.. -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/include -I../lib/eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include -I../kernel/linux -Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log -I../lib/log -Ilib/metrics -I../lib/metrics -Ilib/telemetry -I../lib/telemetry -Idrivers/bus/pci -I../drivers/bus/pci -I../drivers/bus/pci/linux -Ilib/pci -I../lib/pci -Idrivers/bus/vmbus -I../drivers/bus/vmbus -I../drivers/bus/vmbus/linux -Ilib/argparse -I../lib/argparse -Ilib/ptr_compress -I../lib/ptr_compress -Ilib/ring -I../lib/ring -Ilib/rcu -I../lib/rcu -Ilib/mempool -I../lib/mempool -Ilib/mbuf -I../lib/mbuf -Ilib/net -I../lib/net -Ilib/meter -I../lib/meter -Ilib/ethdev -I../lib/ethdev -Ilib/cmdline -I../lib/cmdline -Ilib/hash -I../lib/hash -Ilib/timer -I../lib/timer -Ilib/acl -I../lib/acl -Ilib/bbdev -I../lib/bbdev -Ilib/bitratestats -I../lib/bitratestats -Ilib/bpf -I../lib/bpf -Ilib/cfgfile -I../lib/cfgfile -Ilib/compressdev -I../lib/compressdev -Ilib/cryptodev -I../lib/cryptodev -Ilib/distributor -I../lib/distributor -Ilib/dmadev -I../lib/dmadev -Ilib/efd -I../lib/efd -Ilib/eventdev -I../lib/eventdev -Ilib/dispatcher -I../lib/dispatcher -Ilib/gpudev -I../lib/gpudev -Ilib/gro -I../lib/gro -Ilib/gso -I../lib/gso -Ilib/ip_frag -I../lib/ip_frag -Ilib/jobstats -I../lib/jobstats -Ilib/latencystats -I../lib/latencystats -Ilib/lpm -I../lib/lpm -Ilib/member -I../lib/member -Ilib/pcapng -I../lib/pcapng -Ilib/power -I../lib/power -Ilib/rawdev -I../lib/rawdev -Ilib/regexdev -I../lib/regexdev -Ilib/mldev -I../lib/mldev -Ilib/rib -I../lib/rib -Ilib/reorder -I../lib/reorder -Ilib/sched -I../lib/sched -Ilib/security -I../lib/security -Ilib/stack -I../lib/stack -Ilib/vhost -I../lib/vhost -Ilib/ipsec -I../lib/ipsec -Ilib/pdcp -I../lib/pdcp -Ilib/fib -I../lib/fib -Ilib/port -I../lib/port -Ilib/pdump -I../lib/pdump -Ilib/table -I../lib/table -Ilib/pipeline -I../lib/pipeline -Ilib/graph -I../lib/graph -Ilib/node -I../lib/node -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wnon-virtual-dtor -Wextra -Werror -g -include rte_config.h -march=corei7 -mrtm -MD -MQ buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o -MF buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o.d -o buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o -c buildtools/chkincs/chkincs-cpp.p/rte_graph_worker.cpp
In file included from /home/runner/work/dpdk/dpdk/lib/graph/rte_graph_model_rtc.h:6,
                 from /home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker.h:9,
                 from buildtools/chkincs/chkincs-cpp.p/rte_graph_worker.cpp:1:
/home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker_common.h:108:15: error: attribute ignored in declaration of ‘union rte_node::<unnamed>’ [-Werror=attributes]
  108 |         union {
      |               ^
/home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker_common.h:108:15: note: attribute for ‘union rte_node::<unnamed>’ must follow the ‘union’ keyword
cc1plus: all warnings being treated as errors
[5410/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_table_lpm.cpp.o
[5411/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_port_in_action.cpp.o
[5412/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_pipeline.cpp.o
[5413/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_table_action.cpp.o
[5414/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_swx_ipsec.cpp.o
ninja: build stopped: subcommand failed.
  

Patch

diff --git a/lib/graph/rte_graph_model_mcore_dispatch.c b/lib/graph/rte_graph_model_mcore_dispatch.c
index a590fc9..a81d338 100644
--- a/lib/graph/rte_graph_model_mcore_dispatch.c
+++ b/lib/graph/rte_graph_model_mcore_dispatch.c
@@ -118,11 +118,14 @@ 
 					      struct rte_graph_rq_head *rq)
 {
 	const unsigned int lcore_id = node->dispatch.lcore_id;
-	struct rte_graph *graph;
+	struct rte_graph *graph = node->dispatch.graph;
 
-	SLIST_FOREACH(graph, rq, next)
-		if (graph->dispatch.lcore_id == lcore_id)
-			break;
+	if (unlikely((!graph) || (graph->dispatch.lcore_id != lcore_id))) {
+		SLIST_FOREACH(graph, rq, next)
+			if (graph->dispatch.lcore_id == lcore_id)
+				break;
+		node->dispatch.graph = graph;
+	}
 
 	return graph != NULL ? __graph_sched_node_enqueue(node, graph) : false;
 }
diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
index a518af2..4c2432b 100644
--- a/lib/graph/rte_graph_worker_common.h
+++ b/lib/graph/rte_graph_worker_common.h
@@ -110,6 +110,7 @@  struct __rte_cache_aligned rte_node {
 			unsigned int lcore_id;  /**< Node running lcore. */
 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
+			struct rte_graph *graph;  /**< Graph corresponding to lcore_id. */
 		} dispatch;
 	};
 	rte_graph_off_t xstat_off; /**< Offset to xstat counters. */