[v2] graph: mcore: optimize graph search
Checks
Commit Message
From: Huichao cai <chcchc88@163.com>
In the function __rte_graph_mcore_dispatch_sched_node_enqueue,
use a slower loop to search for the graph, modify the search logic
to record the result of the first search, and use this record for
subsequent searches to improve search speed.
Signed-off-by: Huichao cai <chcchc88@163.com>
---
lib/graph/rte_graph_model_mcore_dispatch.c | 11 +++++++----
lib/graph/rte_graph_worker_common.h | 1 +
2 files changed, 8 insertions(+), 4 deletions(-)
Comments
> -----Original Message-----
> From: Huichao Cai <chcchc88@163.com>
> Sent: Monday, November 11, 2024 9:33 AM
> To: Jerin Jacob <jerinj@marvell.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>; Nithin Kumar Dabilpuram
> <ndabilpuram@marvell.com>; yanzhirun_163@163.com
> Cc: dev@dpdk.org; Huichao cai <chcchc88@163.com>
> Subject: [EXTERNAL] [PATCH v2] graph: mcore: optimize graph search
>
> From: Huichao cai <chcchc88@ 163. com> In the function
> __rte_graph_mcore_dispatch_sched_node_enqueue, use a slower loop to
> search for the graph, modify the search logic to record the result of the first
> search, and use this record for subsequent
> From: Huichao cai <chcchc88@163.com>
>
> In the function __rte_graph_mcore_dispatch_sched_node_enqueue,
> use a slower loop to search for the graph, modify the search logic to record the
> result of the first search, and use this record for subsequent searches to
> improve search speed.
>
> Signed-off-by: Huichao cai <chcchc88@163.com>
> ---
> lib/graph/rte_graph_model_mcore_dispatch.c | 11 +++++++----
> lib/graph/rte_graph_worker_common.h | 1 +
> 2 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.c
> b/lib/graph/rte_graph_model_mcore_dispatch.c
> index a590fc9..a81d338 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.c
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.c
> @@ -118,11 +118,14 @@
> struct rte_graph_rq_head *rq) {
> const unsigned int lcore_id = node->dispatch.lcore_id;
> - struct rte_graph *graph;
> + struct rte_graph *graph = node->dispatch.graph;
>
> - SLIST_FOREACH(graph, rq, next)
> - if (graph->dispatch.lcore_id == lcore_id)
> - break;
> + if (unlikely((!graph) || (graph->dispatch.lcore_id != lcore_id))) {
> + SLIST_FOREACH(graph, rq, next)
> + if (graph->dispatch.lcore_id == lcore_id)
> + break;
> + node->dispatch.graph = graph;
> + }
>
> return graph != NULL ? __graph_sched_node_enqueue(node, graph) :
> false; } diff --git a/lib/graph/rte_graph_worker_common.h
> b/lib/graph/rte_graph_worker_common.h
> index a518af2..4c2432b 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -110,6 +110,7 @@ struct __rte_cache_aligned rte_node {
> unsigned int lcore_id; /**< Node running lcore. */
> uint64_t total_sched_objs; /**< Number of objects
> scheduled. */
> uint64_t total_sched_fail; /**< Number of scheduled
> failure. */
> + struct rte_graph *graph; /**< Graph corresponding to
> lcore_id. */
Need to conclude the ABI related discussion here before making change
https://patches.dpdk.org/project/dpdk/patch/1730966682-2632-1-git-send-email-chcchc88@163.com/
> } dispatch;
> };
> rte_graph_off_t xstat_off; /**< Offset to xstat counters. */
> --
> 1.8.3.1
> [main] [dpdk.org] $ git diff
> diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
> index a518af2b2a..ec9a82186d 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -104,6 +104,7 @@ struct __rte_cache_aligned rte_node {
> /** Original process function when pcap is enabled. */
> rte_node_process_t original_process;
> + alignas(RTE_CACHE_LINE_MIN_SIZE)
> union {
Hi, Jerin
The C++standard cannot align anonymous unions. Do we need to fill in reserved fields in order to maintain union alignment with RTE-CAHE_LINE_LIN_SIZE bytes?
> /* Fast schedule area for mcore dispatch model */
> struct {
> @@ -112,6 +113,7 @@ struct __rte_cache_aligned rte_node {
> uint64_t total_sched_fail; /**< Number of scheduled failure. */
> } dispatch;
> };
> + alignas(RTE_CACHE_LINE_MIN_SIZE)
> rte_graph_off_t xstat_off; /**< Offset to xstat counters. */
> /* Fast path area */
> __extension__ struct __rte_cache_aligned {
FAILED: buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o
ccache c++ -Ibuildtools/chkincs/chkincs-cpp.p -Ibuildtools/chkincs -I../buildtools/chkincs -Iexamples/l3fwd -I../examples/l3fwd -I../examples/common -Idrivers/bus/vdev -I../drivers/bus/vdev -I. -I.. -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/include -I../lib/eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include -I../kernel/linux -Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log -I../lib/log -Ilib/metrics -I../lib/metrics -Ilib/telemetry -I../lib/telemetry -Idrivers/bus/pci -I../drivers/bus/pci -I../drivers/bus/pci/linux -Ilib/pci -I../lib/pci -Idrivers/bus/vmbus -I../drivers/bus/vmbus -I../drivers/bus/vmbus/linux -Ilib/argparse -I../lib/argparse -Ilib/ptr_compress -I../lib/ptr_compress -Ilib/ring -I../lib/ring -Ilib/rcu -I../lib/rcu -Ilib/mempool -I../lib/mempool -Ilib/mbuf -I../lib/mbuf -Ilib/net -I../lib/net -Ilib/meter -I../lib/meter -Ilib/ethdev -I../lib/ethdev -Ilib/cmdline -I../lib/cmdline -Ilib/hash -I../lib/hash -Ilib/timer -I../lib/timer -Ilib/acl -I../lib/acl -Ilib/bbdev -I../lib/bbdev -Ilib/bitratestats -I../lib/bitratestats -Ilib/bpf -I../lib/bpf -Ilib/cfgfile -I../lib/cfgfile -Ilib/compressdev -I../lib/compressdev -Ilib/cryptodev -I../lib/cryptodev -Ilib/distributor -I../lib/distributor -Ilib/dmadev -I../lib/dmadev -Ilib/efd -I../lib/efd -Ilib/eventdev -I../lib/eventdev -Ilib/dispatcher -I../lib/dispatcher -Ilib/gpudev -I../lib/gpudev -Ilib/gro -I../lib/gro -Ilib/gso -I../lib/gso -Ilib/ip_frag -I../lib/ip_frag -Ilib/jobstats -I../lib/jobstats -Ilib/latencystats -I../lib/latencystats -Ilib/lpm -I../lib/lpm -Ilib/member -I../lib/member -Ilib/pcapng -I../lib/pcapng -Ilib/power -I../lib/power -Ilib/rawdev -I../lib/rawdev -Ilib/regexdev -I../lib/regexdev -Ilib/mldev -I../lib/mldev -Ilib/rib -I../lib/rib -Ilib/reorder -I../lib/reorder -Ilib/sched -I../lib/sched -Ilib/security -I../lib/security -Ilib/stack -I../lib/stack -Ilib/vhost -I../lib/vhost -Ilib/ipsec -I../lib/ipsec -Ilib/pdcp -I../lib/pdcp -Ilib/fib -I../lib/fib -Ilib/port -I../lib/port -Ilib/pdump -I../lib/pdump -Ilib/table -I../lib/table -Ilib/pipeline -I../lib/pipeline -Ilib/graph -I../lib/graph -Ilib/node -I../lib/node -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wnon-virtual-dtor -Wextra -Werror -g -include rte_config.h -march=corei7 -mrtm -MD -MQ buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o -MF buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o.d -o buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_graph_worker.cpp.o -c buildtools/chkincs/chkincs-cpp.p/rte_graph_worker.cpp
In file included from /home/runner/work/dpdk/dpdk/lib/graph/rte_graph_model_rtc.h:6,
from /home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker.h:9,
from buildtools/chkincs/chkincs-cpp.p/rte_graph_worker.cpp:1:
/home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker_common.h:108:15: error: attribute ignored in declaration of ‘union rte_node::<unnamed>’ [-Werror=attributes]
108 | union {
| ^
/home/runner/work/dpdk/dpdk/lib/graph/rte_graph_worker_common.h:108:15: note: attribute for ‘union rte_node::<unnamed>’ must follow the ‘union’ keyword
cc1plus: all warnings being treated as errors
[5410/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_table_lpm.cpp.o
[5411/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_port_in_action.cpp.o
[5412/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_pipeline.cpp.o
[5413/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_table_action.cpp.o
[5414/6569] Compiling C++ object buildtools/chkincs/chkincs-cpp.p/meson-generated_rte_swx_ipsec.cpp.o
ninja: build stopped: subcommand failed.
@@ -118,11 +118,14 @@
struct rte_graph_rq_head *rq)
{
const unsigned int lcore_id = node->dispatch.lcore_id;
- struct rte_graph *graph;
+ struct rte_graph *graph = node->dispatch.graph;
- SLIST_FOREACH(graph, rq, next)
- if (graph->dispatch.lcore_id == lcore_id)
- break;
+ if (unlikely((!graph) || (graph->dispatch.lcore_id != lcore_id))) {
+ SLIST_FOREACH(graph, rq, next)
+ if (graph->dispatch.lcore_id == lcore_id)
+ break;
+ node->dispatch.graph = graph;
+ }
return graph != NULL ? __graph_sched_node_enqueue(node, graph) : false;
}
@@ -110,6 +110,7 @@ struct __rte_cache_aligned rte_node {
unsigned int lcore_id; /**< Node running lcore. */
uint64_t total_sched_objs; /**< Number of objects scheduled. */
uint64_t total_sched_fail; /**< Number of scheduled failure. */
+ struct rte_graph *graph; /**< Graph corresponding to lcore_id. */
} dispatch;
};
rte_graph_off_t xstat_off; /**< Offset to xstat counters. */