[v2,3/3] examples/l3fwd: eliminate unnecessary reloads in loop
Checks
Commit Message
Number of rx queue and number of rx port in lcore config are constants
during the period of l3 forward application running. But compiler has
no this information.
Copied values from lcore config to local variables and used the local
variables for iteration. Compiler can see that the local variables are
not changed, so qconf reloads at each iteration can be eliminated.
The change showed 1.8% performance uplift in single core, single port,
single queue test on N1SDP platform with MLX5 NIC.
Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
examples/l3fwd/l3fwd_lpm.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
Comments
On Tue, Jun 1, 2021 at 1:27 PM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Number of rx queue and number of rx port in lcore config are constants
> during the period of l3 forward application running. But compiler has
> no this information.
>
> Copied values from lcore config to local variables and used the local
> variables for iteration. Compiler can see that the local variables are
> not changed, so qconf reloads at each iteration can be eliminated.
>
> The change showed 1.8% performance uplift in single core, single port,
> single queue test on N1SDP platform with MLX5 NIC.
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
No performance regression with octeontx2.
Acked-by: Jerin Jacob <jerinj@marvell.com>
> ---
> examples/l3fwd/l3fwd_lpm.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
> index 427c72b1d2..ff1c18a442 100644
> --- a/examples/l3fwd/l3fwd_lpm.c
> +++ b/examples/l3fwd/l3fwd_lpm.c
> @@ -154,14 +154,16 @@ lpm_main_loop(__rte_unused void *dummy)
> lcore_id = rte_lcore_id();
> qconf = &lcore_conf[lcore_id];
>
> - if (qconf->n_rx_queue == 0) {
> + const uint16_t n_rx_q = qconf->n_rx_queue;
> + const uint16_t n_tx_p = qconf->n_tx_port;
> + if (n_rx_q == 0) {
> RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
> return 0;
> }
>
> RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
>
> - for (i = 0; i < qconf->n_rx_queue; i++) {
> + for (i = 0; i < n_rx_q; i++) {
>
> portid = qconf->rx_queue_list[i].port_id;
> queueid = qconf->rx_queue_list[i].queue_id;
> @@ -181,7 +183,7 @@ lpm_main_loop(__rte_unused void *dummy)
> diff_tsc = cur_tsc - prev_tsc;
> if (unlikely(diff_tsc > drain_tsc)) {
>
> - for (i = 0; i < qconf->n_tx_port; ++i) {
> + for (i = 0; i < n_tx_p; ++i) {
> portid = qconf->tx_port_id[i];
> if (qconf->tx_mbufs[portid].len == 0)
> continue;
> @@ -197,7 +199,7 @@ lpm_main_loop(__rte_unused void *dummy)
> /*
> * Read packet from RX queues
> */
> - for (i = 0; i < qconf->n_rx_queue; ++i) {
> + for (i = 0; i < n_rx_q; ++i) {
> portid = qconf->rx_queue_list[i].port_id;
> queueid = qconf->rx_queue_list[i].queue_id;
> nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
> --
> 2.25.1
>
@@ -154,14 +154,16 @@ lpm_main_loop(__rte_unused void *dummy)
lcore_id = rte_lcore_id();
qconf = &lcore_conf[lcore_id];
- if (qconf->n_rx_queue == 0) {
+ const uint16_t n_rx_q = qconf->n_rx_queue;
+ const uint16_t n_tx_p = qconf->n_tx_port;
+ if (n_rx_q == 0) {
RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
return 0;
}
RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
- for (i = 0; i < qconf->n_rx_queue; i++) {
+ for (i = 0; i < n_rx_q; i++) {
portid = qconf->rx_queue_list[i].port_id;
queueid = qconf->rx_queue_list[i].queue_id;
@@ -181,7 +183,7 @@ lpm_main_loop(__rte_unused void *dummy)
diff_tsc = cur_tsc - prev_tsc;
if (unlikely(diff_tsc > drain_tsc)) {
- for (i = 0; i < qconf->n_tx_port; ++i) {
+ for (i = 0; i < n_tx_p; ++i) {
portid = qconf->tx_port_id[i];
if (qconf->tx_mbufs[portid].len == 0)
continue;
@@ -197,7 +199,7 @@ lpm_main_loop(__rte_unused void *dummy)
/*
* Read packet from RX queues
*/
- for (i = 0; i < qconf->n_rx_queue; ++i) {
+ for (i = 0; i < n_rx_q; ++i) {
portid = qconf->rx_queue_list[i].port_id;
queueid = qconf->rx_queue_list[i].queue_id;
nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,