[v6,2/2] Add l2reflect measurement application
Checks
Commit Message
The l2reflect application implements a ping-pong benchmark to
measure the latency between two instances. For communication,
we use raw ethernet and send one packet at a time. The timing data
is collected locally and min/max/avg values are displayed in a TUI.
Finally, a histogram of the latencies is printed which can be
further processed with the jitterdebugger visualization scripts.
To debug latency spikes, a max threshold can be defined.
If it is hit, a trace point is created on both instances.
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
Signed-off-by: Henning Schild <henning.schild@siemens.com>
---
app/l2reflect/colors.c | 34 ++
app/l2reflect/colors.h | 19 +
app/l2reflect/l2reflect.h | 53 ++
app/l2reflect/main.c | 1007 +++++++++++++++++++++++++++++++++++++
app/l2reflect/meson.build | 21 +
app/l2reflect/payload.h | 26 +
app/l2reflect/stats.c | 225 +++++++++
app/l2reflect/stats.h | 67 +++
app/l2reflect/utils.c | 67 +++
app/l2reflect/utils.h | 20 +
app/meson.build | 1 +
11 files changed, 1540 insertions(+)
create mode 100644 app/l2reflect/colors.c
create mode 100644 app/l2reflect/colors.h
create mode 100644 app/l2reflect/l2reflect.h
create mode 100644 app/l2reflect/main.c
create mode 100644 app/l2reflect/meson.build
create mode 100644 app/l2reflect/payload.h
create mode 100644 app/l2reflect/stats.c
create mode 100644 app/l2reflect/stats.h
create mode 100644 app/l2reflect/utils.c
create mode 100644 app/l2reflect/utils.h
Comments
Hi Felix,
First, I support the idea of having the l2reflect application part of
the DPDK repository.
Please note CI failed to build it on different platforms:
http://mails.dpdk.org/archives/test-report/2022-September/304617.html
It also fails to build on my Fc35 machine:
[3237/3537] Compiling C object app/dpdk-l2reflect.p/l2reflect_main.c.o
../app/l2reflect/main.c: In function ‘l2reflect_main_loop’:
../app/l2reflect/main.c:560:19: warning: array subscript ‘uint64_t {aka
long unsigned int}[0]’ is partly outside array bounds of ‘struct
rte_ether_addr[1]’ [-Warray-bounds]
560 | i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) &
MAC_ADDR_CMP) >
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../app/l2reflect/main.c:110:23: note: while referencing
‘l2reflect_port_eth_addr’
110 | struct rte_ether_addr l2reflect_port_eth_addr;
| ^~~~~~~~~~~~~~~~~~~~~~~
../app/l2reflect/main.c:561:27: warning: array subscript ‘uint64_t {aka
long unsigned int}[0]’ is partly outside array bounds of ‘struct
rte_ether_addr[1]’ [-Warray-bounds]
561 | (*((uint64_t
*)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../app/l2reflect/main.c:111:23: note: while referencing
‘l2reflect_remote_eth_addr’
111 | struct rte_ether_addr l2reflect_remote_eth_addr;
| ^~~~~~~~~~~~~~~~~
Some more comments inline:
On 9/2/22 10:45, Felix Moessbauer wrote:
> The l2reflect application implements a ping-pong benchmark to
> measure the latency between two instances. For communication,
> we use raw ethernet and send one packet at a time. The timing data
> is collected locally and min/max/avg values are displayed in a TUI.
> Finally, a histogram of the latencies is printed which can be
> further processed with the jitterdebugger visualization scripts.
> To debug latency spikes, a max threshold can be defined.
> If it is hit, a trace point is created on both instances.
>
> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> Signed-off-by: Henning Schild <henning.schild@siemens.com>
> ---
> app/l2reflect/colors.c | 34 ++
> app/l2reflect/colors.h | 19 +
> app/l2reflect/l2reflect.h | 53 ++
> app/l2reflect/main.c | 1007 +++++++++++++++++++++++++++++++++++++
> app/l2reflect/meson.build | 21 +
> app/l2reflect/payload.h | 26 +
> app/l2reflect/stats.c | 225 +++++++++
> app/l2reflect/stats.h | 67 +++
> app/l2reflect/utils.c | 67 +++
> app/l2reflect/utils.h | 20 +
> app/meson.build | 1 +
> 11 files changed, 1540 insertions(+)
> create mode 100644 app/l2reflect/colors.c
> create mode 100644 app/l2reflect/colors.h
> create mode 100644 app/l2reflect/l2reflect.h
> create mode 100644 app/l2reflect/main.c
> create mode 100644 app/l2reflect/meson.build
> create mode 100644 app/l2reflect/payload.h
> create mode 100644 app/l2reflect/stats.c
> create mode 100644 app/l2reflect/stats.h
> create mode 100644 app/l2reflect/utils.c
> create mode 100644 app/l2reflect/utils.h
If we agree to have this application in app/ directory,
I think you'll have to add documentation for this new tool in
doc/guides/tools/.
> diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
> new file mode 100644
> index 0000000000..af881d8788
> --- /dev/null
> +++ b/app/l2reflect/colors.c
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +
> +#include "colors.h"
> +
> +const struct color_palette *colors;
> +
> +static const struct color_palette color_palette_default = {
> + .red = "\x1b[01;31m",
> + .green = "\x1b[01;32m",
> + .yellow = "\x1b[01;33m",
> + .blue = "\x1b[01;34m",
> + .magenta = "\x1b[01;35m",
> + .cyan = "\x1b[01;36m",
> + .reset = "\x1b[0m"
> +};
> +
> +static const struct color_palette color_palette_bw = { .red = "",
> + .green = "",
> + .yellow = "",
> + .blue = "",
> + .magenta = "",
> + .cyan = "",
> + .reset = "" };
> +
> +void
> +enable_colors(int enable)
> +{
> + if (enable)
> + colors = &color_palette_default;
> + else
> + colors = &color_palette_bw;
> +}
> diff --git a/app/l2reflect/colors.h b/app/l2reflect/colors.h
> new file mode 100644
> index 0000000000..346547138b
> --- /dev/null
> +++ b/app/l2reflect/colors.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +#ifndef _L2REFLECT_COLORS_H_
> +#define _L2REFLECT_COLORS_H_
> +
> +/* posix terminal colors */
> +struct color_palette {
> + const char *red, *green, *yellow, *blue, *magenta, *cyan, *reset;
> +};
> +
> +/* ptr to the current tui color palette */
> +extern const struct color_palette *colors;
> +
> +/* disable colored output */
> +void
> +enable_colors(int enable);
> +
> +#endif /* _L2REFLECT_COLORS_H_ */
> diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
> new file mode 100644
> index 0000000000..922bd7c281
> --- /dev/null
> +++ b/app/l2reflect/l2reflect.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +
> +#include <stdatomic.h>
> +
> +#ifndef _L2REFLECT_L2REFLECT_H_
> +#define _L2REFLECT_L2REFLECT_H_
> +
> +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
> +
> +/* max size that common 1G NICs support */
> +#define MAX_JUMBO_PKT_LEN 9600
> +
> +/* Used to compare MAC addresses. */
> +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
> +
> +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
> +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
> +
> +enum {
> + TRACE_TYPE_DATA,
> + TRACE_TYPE_HELO,
> + TRACE_TYPE_EHLO,
> + TRACE_TYPE_RSET,
> + TRACE_TYPE_QUIT,
> +};
> +
> +enum STATE {
> + /* elect the initial sender */
> + S_ELECT_LEADER = 1,
> + /* reset the counters */
> + S_RESET_TRX = 2,
> + /* measurement S_RUNNING */
> + S_RUNNING = 4,
> + /* terminated by local event */
> + S_LOCAL_TERM = 8,
> + /* terminated by remote event */
> + S_REMOTE_TERM = 16
> +};
> +
> +extern int l2reflect_hist;
> +extern unsigned int l2reflect_hist_buckets;
> +extern atomic_int l2reflect_output_hist;
> +extern int l2reflect_interrupt;
> +extern uint64_t l2reflect_sleep_msec;
> +extern uint64_t l2reflect_pkt_bytes;
> +extern uint16_t l2reflect_port_number;
> +extern atomic_int l2reflect_state;
> +extern struct rte_ether_addr l2reflect_port_eth_addr;
> +extern struct rte_ether_addr l2reflect_remote_eth_addr;
> +
> +#endif /* _L2REFLECT_L2REFLECT_H_ */
> diff --git a/app/l2reflect/main.c b/app/l2reflect/main.c
> new file mode 100644
> index 0000000000..33a87e8fad
> --- /dev/null
> +++ b/app/l2reflect/main.c
> @@ -0,0 +1,1007 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + *
> + * The l2reflect application implements a ping-pong benchmark to
> + * measure the latency between two instances. For communication,
> + * we use raw ethernet and send one packet at a time. The timing data
> + * is collected locally and min/max/avg values are displayed in a TUI.
> + * Finally, a histogram of the latencies is printed which can be
> + * further processed with the jitterdebugger visualization scripts.
> + * To debug latency spikes, a max threshold can be defined.
> + * If it is hit, a trace point is created on both instances.
> + *
> + * Examples:
> + * launch (non-rt kernel): l2reflect --lcores 0@0,1@6 -n 1
> + * launch (rt kernel): l2reflect --lcores 0@0,1@6 -n 1 -- -P 50 -r -l
> + *
> + * For histogram data, launch with -H <usec> -F <output file>, e.g.
> + * -H 10 -F histogram.json for a histogram with 10 usec buckets which
> + * is written to a histogram.json file. This file can then be visualized
> + * using the jitterdebugger plotting scripts:
> + * jitterplot hist histogram.json
> + *
> + * While the application is running, it can be controlled by sending
> + * signals to one of the processes:
> + * - SIGUSR1: reset the min/max/avg on both instances
> + * - SIGUSR2: output / write the current histogram
> + * - SIGHUP/SIGINT: gracefully terminate both instances
> + *
> + * Note on wiring:
> + * The l2reflect application sends the packets via a physical ethernet
> + * interface. When running both instances on a single system, at least
> + * two dedicated physical ports and a (physical) loopback between them
> + * is required.
Above text could be used as a basis for the documentation.
> + */
> +
> +#include <stdio.h>
> +#include <errno.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <time.h>
> +#include <inttypes.h>
> +#include <getopt.h>
> +#include <sys/signal.h>
> +#include <assert.h>
> +#include <unistd.h>
> +#ifdef HAS_SYS_IO
> +#include <sys/io.h>
> +#endif
> +#include <sched.h>
> +#include <sys/mman.h>
> +#include <stdatomic.h>
> +
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_log.h>
> +#include <rte_memory.h>
> +#include <rte_memcpy.h>
> +#include <rte_memzone.h>
> +#include <rte_eal.h>
> +#include <rte_eal_trace.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_atomic.h>
> +#include <rte_cycles.h>
> +#include <rte_prefetch.h>
> +#include <rte_lcore.h>
> +#include <rte_per_lcore.h>
> +#include <rte_branch_prediction.h>
> +#include <rte_interrupts.h>
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev.h>
> +#include <rte_ring.h>
> +#include <rte_mempool.h>
> +#include <rte_mbuf.h>
> +
> +#include "l2reflect.h"
> +#include "payload.h"
> +#include "utils.h"
> +#include "colors.h"
> +#include "stats.h"
> +
> +#define NSEC_PER_SEC 1000000000
> +
> +#define NB_MBUF 2047
> +
> +#define MAX_PKT_BURST 32
> +/* warmup a few round before starting the measurement */
> +#define WARMUP_ROUNDS 42
Would it make sense to have this as default value and provide the user
with the possibility to configure it via command line?
> +
> +/* break after one second */
> +#define DEFAULT_BREAKVAL_USEC 1000000ull
> +/* break if no rx for more than this rounds */
> +#define RX_TIMEOUT_MASK ~0xFFFFFull
> +
> +/* delay between two election packets */
> +#define DELAY_ELECTION_MS 500
> +
> +int l2reflect_hist;
> +unsigned int l2reflect_hist_buckets = HIST_NUM_BUCKETS_DEFAULT;
> +atomic_int l2reflect_output_hist;
> +int l2reflect_fake_mac;
> +int l2reflect_interrupt;
> +uint64_t l2reflect_sleep_msec;
> +uint64_t l2reflect_pkt_bytes = 64;
> +uint16_t l2reflect_port_number;
> +atomic_int l2reflect_state;
> +struct rte_ether_addr l2reflect_port_eth_addr;
> +struct rte_ether_addr l2reflect_remote_eth_addr;
> +
> +static struct timespec last_sent, last_recv;
> +static int quiet, disable_int, priority, policy, l2reflect_mlock;
> +
> +static atomic_int sleep_start;
> +static uint64_t rounds;
> +
> +/* Configurable number of RX/TX ring descriptors */
> +#define RTE_TEST_RX_DESC_DEFAULT 128
> +#define RTE_TEST_TX_DESC_DEFAULT 128
> +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
> +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
> +
> +static struct rte_eth_conf port_conf = {
> + .rxmode = {
> + .split_hdr_size = 0,
> + },
> + .txmode = {
> + .mq_mode = RTE_ETH_MQ_TX_NONE,
> + },
> +};
> +
> +static uint32_t l2reflect_q;
> +static uint64_t l2reflect_break_usec = DEFAULT_BREAKVAL_USEC;
> +
> +static struct rte_ether_addr ether_bcast_addr = {
> + .addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
> +};
> +
> +struct rte_mempool *l2reflect_pktmbuf_pool;
> +
> +static void
> +l2reflect_usage(const char *prgname)
> +{
> + printf("%s [EAL options] -- [-p PORT] -P [PRIO] [-b USEC] [-n SIZE] [-r] [-f] [-l]"
> + "[-q] [-d] [-H USEC] [-B NUM] [-F FILE] [-S] [-i MSEC] [-m] [-c] [-h]\n"
> + " -p PORT: port to configure\n"
> + " -P PRIO: scheduling priority to use\n"
> + " -b USEC: break when latency > USEC\n"
> + " -n SIZE: size of packet in bytes [%i,%i]\n"
> + " (when using jumbo frames, sender and receiver values have to match)\n"
> + " -r: scheduling policy SCHED_RR\n"
> + " -f: scheduling policy SCHED_FIFO\n"
> + " -l: lock memory (mlockall)\n"
> + " -q: quiet, do not print stats\n"
> +#ifdef HAS_SYS_IO
> + " -d: ignore maskable interrupts\n"
> +#endif
> + " -H USEC: create histogram of latencies with USEC time slices\n"
> + " -B NUM: number of histogram buckets\n"
> + " -F FILE: write histogram to file\n"
> + " -S: start processing threads in sleep, wake with SIGCONT\n"
> + " -i MSEC: use interrupts instead of polling (cont. on interrupt or after MSEC)\n"
> + " -m: fake the source mac addr by adding 1 to the last tuple\n"
> + " -c: disable colored output\n"
> + " -h: display help message\n",
> + prgname, RTE_ETHER_MIN_LEN, MAX_JUMBO_PKT_LEN);
> +}
> +
> +static int
> +check_opts_for_help(int argc, char **argv, void(*display_help)(const char *))
> +{
> + if (argc > 2 && !strncmp(argv[1], "--", 3)) {
> + if (!strncmp(argv[2], "-h", 3) || !strncmp(argv[2], "--help", 7)) {
> + display_help(argv[0]);
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> +/* Parse the argument given in the command line of the application */
> +static int
> +l2reflect_parse_args(int argc, char **argv)
> +{
> + int opt, ret;
> + char **argvopt;
> + int option_index;
> + int opt_colors = 1;
> + char *prgname = argv[0];
> + static struct option lgopts[] = { { NULL, 0, 0, 0 } };
> +
> + argvopt = argv;
> + policy = SCHED_OTHER;
> + hist_filename = NULL;
> + l2reflect_output_hist = 0;
> +
> + while ((opt = getopt_long(argc, argvopt, "p:P:b:H:B:F:i:n:qdrflScm", lgopts,
> + &option_index)) != EOF) {
> + switch (opt) {
> + /* port */
> + case 'p':
> + l2reflect_port_number =
> + (uint16_t)strtoul(optarg, NULL, 10);
> + break;
> + case 'P':
> + priority = strtoul(optarg, NULL, 10);
> + if (priority > 0) {
> + if (policy == SCHED_OTHER)
> + policy = SCHED_RR;
> + l2reflect_mlock = 1;
> + }
> + break;
> + case 'b':
> + l2reflect_break_usec = strtoul(optarg, NULL, 10);
> + break;
> + case 'S':
> + sleep_start = 1;
> + break;
> + case 'q':
> + quiet = 1;
> + break;
> + case 'd':
> + disable_int = 1;
> + break;
> + case 'r':
> + policy = SCHED_RR;
> + break;
> + case 'f':
> + policy = SCHED_FIFO;
> + break;
> + case 'l':
> + l2reflect_mlock = 1;
> + break;
> + case 'H':
> + l2reflect_hist = 1;
> + hist_bucket_usec = strtoul(optarg, NULL, 10);
> +#ifndef RTE_HAS_JANSSON
> + printf("not compiled with cjson support\n");
s/cjson/jansson/
> + return -1;
> +#endif
> + break;
> + case 'B':
> + l2reflect_hist_buckets = strtoul(optarg, NULL, 10);
> + break;
> + case 'F':
> + hist_filename = strndup(optarg, 128);
> + break;
> + case 'i':
> + l2reflect_interrupt = 1;
> + l2reflect_sleep_msec = strtoul(optarg, NULL, 10);
> + break;
> + case 'n':
> + l2reflect_pkt_bytes = strtoull(optarg, NULL, 10);
> + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
> + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN) {
> + printf("packet size %" PRIu64 " not valid\n", l2reflect_pkt_bytes);
> + return -1;
> + }
> + if (l2reflect_pkt_bytes > RTE_MBUF_DEFAULT_DATAROOM) {
> + printf("NOT IMPLEMENTED. Packet size %" PRIu64 " requires segmented buffers.\n",
> + l2reflect_pkt_bytes);
> + return -1;
> + }
> + break;
> + case 'c':
> + opt_colors = 0;
> + break;
> + case 'm':
> + l2reflect_fake_mac = 1;
> + break;
> + default:
> + l2reflect_usage(prgname);
> + return -1;
> + }
> + }
> +
> + if (optind >= 0)
> + argv[optind - 1] = prgname;
> +
> + if (hist_filename && !l2reflect_hist) {
> + printf("-F switch requires -H switch as well\n");
> + return -1;
> + }
> +
> + /* output is redirected, disable coloring */
> + if (!isatty(fileno(stdout)))
> + opt_colors = 0;
> +
> + enable_colors(opt_colors);
> +
> + ret = optind - 1;
> + optind = 0; /* reset getopt lib */
> + return ret;
> +}
> +
> +/* Send a burst of one packet */
> +static inline int
> +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
> +{
> + unsigned int ret;
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> + uint16_t type;
> +
> + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
> + pkt = (struct my_magic_packet *)eth;
> + type = pkt->type;
> +
> + if (likely(type == TRACE_TYPE_DATA))
> + clock_gettime(CLOCK_MONOTONIC, &last_sent);
> + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
> + if (unlikely(ret < 1))
> + rte_pktmbuf_free(*m);
> + return 0;
> +}
> +
> +static inline void
> +l2reflect_simple_forward(struct rte_mbuf *m)
> +{
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> +
> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> + pkt = (struct my_magic_packet *)eth;
> +
> + /* dst addr */
> + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
> + /* src addr */
> + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
> +
> + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
> + rte_eal_trace_generic_str("sending traced packet");
> +
> + l2reflect_send_packet(&m, l2reflect_port_number);
> +}
> +
> +static struct rte_mbuf *
> +l2reflect_new_pkt(unsigned int type)
> +{
> + struct rte_mbuf *m;
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> + uint64_t frame_bytes = RTE_ETHER_MIN_LEN;
> +
> + m = rte_pktmbuf_alloc(l2reflect_pktmbuf_pool);
> + if (m == NULL)
> + rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc failed\n");
> +
> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> +
> + if (type == TRACE_TYPE_DATA)
> + frame_bytes = l2reflect_pkt_bytes;
> +
> + /* zero out packet to make dumps better readable */
> + memset(eth, 0, frame_bytes - RTE_ETHER_CRC_LEN);
> +
> + if (type == TRACE_TYPE_HELO)
> + rte_ether_addr_copy(ðer_bcast_addr, ð->dst_addr);
> + else
> + rte_ether_addr_copy(&l2reflect_remote_eth_addr, ð->dst_addr);
> +
> + /* src addr */
> + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
> + eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT);
> +
> + m->data_len = frame_bytes - RTE_ETHER_CRC_LEN;
> + m->pkt_len = frame_bytes - RTE_ETHER_CRC_LEN;
> +
> + pkt = (struct my_magic_packet *)eth;
> + pkt->type = type;
> + pkt->breakval = l2reflect_break_usec;
> + pkt->req_pkt_bytes = l2reflect_pkt_bytes;
> +
> + return m;
> +}
> +
> +static void
> +l2reflect_send_reset(void)
> +{
> + struct rte_mbuf *m;
> + m = l2reflect_new_pkt(TRACE_TYPE_RSET);
> + l2reflect_send_packet(&m, l2reflect_port_number);
> +}
> +
> +static void
> +l2reflect_send_quit(void)
> +{
> + struct rte_mbuf *m;
> + m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
> + l2reflect_send_packet(&m, l2reflect_port_number);
> +}
> +
> +static void
> +l2reflect_new_ball(void)
> +{
> + struct rte_mbuf *pnewball;
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> + char mac_src_str[RTE_ETHER_ADDR_FMT_SIZE];
> + char mac_dst_str[RTE_ETHER_ADDR_FMT_SIZE];
> +
> + RTE_LOG(INFO, L2REFLECT, "Should create a packet to play with ...\n");
> + pnewball = l2reflect_new_pkt(TRACE_TYPE_DATA);
> +
> + eth = rte_pktmbuf_mtod(pnewball, struct rte_ether_hdr *);
> +
> + rte_ether_format_addr(mac_src_str, sizeof(mac_src_str), &l2reflect_port_eth_addr);
> + rte_ether_format_addr(mac_dst_str, sizeof(mac_dst_str), &l2reflect_remote_eth_addr);
> + RTE_LOG(INFO, L2REFLECT, "from MAC address: %s to %s\n\n", mac_src_str, mac_dst_str);
> +
> + pkt = (struct my_magic_packet *)eth;
> +
> + /* we are tracing lets tell the others */
> + if (l2reflect_break_usec)
> + pkt->magic = MAGIC_TRACE_PAYLOAD;
> +
> + l2reflect_send_packet(&pnewball, l2reflect_port_number);
> +}
> +
> +static inline int64_t
> +calcdiff_ns(struct timespec t1, struct timespec t2)
> +{
> + int64_t diff;
> + diff = NSEC_PER_SEC * (int64_t)((int)t1.tv_sec - (int)t2.tv_sec);
> + diff += ((int)t1.tv_nsec - (int)t2.tv_nsec);
> + return diff;
> +}
> +
> +/* filter the received packets for actual l2reflect messages */
> +static inline unsigned int
> +l2reflect_rx_filter(
> + struct rte_mbuf *buf)
> +{
> + struct rte_ether_hdr *eth;
> + eth = rte_pktmbuf_mtod(buf, struct rte_ether_hdr *);
> +
> + if (unlikely(buf->nb_segs > 1))
> + RTE_LOG(WARNING, L2REFLECT, "Segmented packet: data-len: %i, pkt-len: %i, #seg: %i\n",
> + buf->data_len, buf->pkt_len, buf->nb_segs);
> +
> + /* check for the l2reflect ether type */
> + if (unlikely(eth->ether_type != rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT)))
> + return 0;
> +
> + /*
> + * if the packet is not from our partner
> + * (and we already have a partner), drop it
> + */
> + if (unlikely(l2reflect_state != S_ELECT_LEADER &&
> + !rte_is_same_ether_addr(ð->src_addr, &l2reflect_remote_eth_addr)))
> + return 0;
> +
> + /* filter bounce-back packets */
> + if (unlikely(rte_is_same_ether_addr(ð->src_addr, &l2reflect_port_eth_addr)))
> + return 0;
> +
> + return 1;
> +}
> +
> +/*
> + * automatically elect the leader of the benchmark by
> + * sending out HELO packets and waiting for responses.
> + * On response, the mac addresses are compared and the
> + * numerically larger one becomes the leader.
> + */
> +static int
> +elect_leader(uint16_t portid)
> +{
> + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> + struct rte_mbuf *m;
> + struct rte_ether_hdr *eth;
> + struct rte_ether_addr *src_addr;
> + struct rte_eth_dev_info dev_info;
> + struct my_magic_packet *pkt;
> + unsigned int i, nb_rx;
> + int ehlo_send = 0;
> + int i_win;
> +
> + while (l2reflect_state == S_ELECT_LEADER) {
> + /* send a packet to make sure the MAC addr of this interface is publicly known */
> + m = l2reflect_new_pkt(TRACE_TYPE_HELO);
> + RTE_LOG(INFO, L2REFLECT, "looking for player HELO\n");
> + l2reflect_send_packet(&m, l2reflect_port_number);
> + rte_delay_ms(DELAY_ELECTION_MS);
> +
> + /* receive election packets */
> + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
> + MAX_PKT_BURST);
> +
> + /* note: do not short-circuit as otherwise the mbufs are not freed */
> + for (i = 0; i < nb_rx; i++) {
> + m = pkts_burst[i];
> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> + src_addr = ð->src_addr;
> + pkt = (struct my_magic_packet *)eth;
> +
> + if (!l2reflect_rx_filter(m)) {
> + rte_pktmbuf_free(m);
> + continue;
> + }
> +
> + if (pkt->type == TRACE_TYPE_EHLO && l2reflect_state == S_ELECT_LEADER) {
> + /* check if both modes are equal */
> + if (((l2reflect_pkt_bytes <= RTE_ETHER_MTU)
> + != (pkt->req_pkt_bytes <= RTE_ETHER_MTU))) {
> + l2reflect_state = S_LOCAL_TERM;
> + m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
> + l2reflect_send_packet(&m, l2reflect_port_number);
> + rte_exit(EXIT_FAILURE,
> + "remote and local jumbo config does not match "
> + "(%" PRIu64 " vs %" PRIu64 ")\n",
> + l2reflect_pkt_bytes, pkt->req_pkt_bytes);
> + }
> + if (l2reflect_pkt_bytes != pkt->req_pkt_bytes) {
> + l2reflect_pkt_bytes = MAX(l2reflect_pkt_bytes,
> + pkt->req_pkt_bytes);
> + rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
> + const uint32_t overhead_len = eth_dev_get_overhead_len(
> + dev_info.max_rx_pktlen,
> + dev_info.max_mtu);
> + const uint16_t mtu = MAX(l2reflect_pkt_bytes - overhead_len,
> + dev_info.min_mtu);
> + RTE_LOG(INFO, L2REFLECT,
> + "update frame sizes: frame: %" PRIu64 ", MTU %d\n",
> + l2reflect_pkt_bytes, mtu);
> + const int ret = rte_eth_dev_set_mtu(
> + l2reflect_port_number,
> + mtu);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE, "failed to update MTU: %s\n",
> + strerror(-ret));
> + }
> +
> + if (ehlo_send) {
> + l2reflect_state = S_RUNNING;
> + RTE_LOG(INFO, L2REFLECT, "Enter running state\n");
> + }
> + }
> + /* we got a HELO packet, respond with EHLO */
> + if (pkt->type == TRACE_TYPE_HELO) {
> + char mac_str_other[RTE_ETHER_ADDR_FMT_SIZE];
> + rte_ether_addr_copy(src_addr, &l2reflect_remote_eth_addr);
> + m = l2reflect_new_pkt(TRACE_TYPE_EHLO);
> + rte_ether_format_addr(
> + mac_str_other, sizeof(mac_str_other), &l2reflect_remote_eth_addr);
> + RTE_LOG(INFO, L2REFLECT, "found one HELO from %s\n", mac_str_other);
> + l2reflect_send_packet(&m, l2reflect_port_number);
> + ehlo_send = 1;
> + }
> + rte_pktmbuf_free(m);
The loop content could be moved in a dedicated function, it would
improve the readability.
> + }
> + }
> +
> + if (rte_is_same_ether_addr(&l2reflect_port_eth_addr, &l2reflect_remote_eth_addr))
> + rte_exit(EXIT_FAILURE, "talking to myself ... confused\n");
> +
> + /* the one with the bigger MAC is the leader */
> + i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) & MAC_ADDR_CMP) >
> + (*((uint64_t *)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));
> +
> + RTE_LOG(INFO, L2REFLECT, "i am the \"%s\"\n", i_win ? "rick" : "morty");
> +
> + return i_win;
> +}
> +
> +/*
> + * add the measured time diff to the statistics.
> + * return false if threshold is hit
> + */
> +static inline int
> +add_to_record(const uint64_t diff)
> +{
> + record.rounds++;
> + /* do not count the first rounds, diff would be too high */
> + if (record.rounds < WARMUP_ROUNDS)
> + return 1;
> +
> + if (l2reflect_hist) {
> + const uint64_t bucket =
> + MIN(diff / (hist_bucket_usec * 1000), l2reflect_hist_buckets-1);
> + record.hist[bucket]++;
> + }
> +
> + record.avg_round_ns += (double)diff;
> + if (diff < record.min_round_ns)
> + record.min_round_ns = diff;
> + if (diff > record.max_round_ns) {
> + record.max_round_ns = diff;
> + if (l2reflect_break_usec &&
> + (record.max_round_ns > (l2reflect_break_usec * 1000)))
> + return 0;
> + }
> + return 1;
> +}
> +
> +/*
> + * process a single packet.
> + * return false if latency threshold is hit
> + */
> +static inline int
> +process_packet(
> + struct my_magic_packet *pkt,
> + struct timespec *rx_time,
> + uint64_t *diff)
> +{
> + if (pkt->type == TRACE_TYPE_DATA) {
> + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
> + *diff = calcdiff_ns(last_recv, last_sent);
> + if (!unlikely(add_to_record(*diff))) {
> + /* TODO: improve tracing */
> + rte_eal_trace_generic_u64(record.max_round_ns / 1000);
> + return 0;
> + }
> + }
> + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
> + rte_eal_trace_generic_str("received traced packet");
> +
> + return 1;
> +}
> +
> +/*
> + * free all packet buffers in the range [begin, end[.
> + */
> +static void
> +free_pktbufs(
> + struct rte_mbuf **bufs,
> + int begin,
> + int end)
> +{
> + int i = begin;
> + for (; i < end; i++)
> + rte_pktmbuf_free(bufs[0]);
rte_pktmbuf_free(bufs[i]) ?
> +}
> +
> +/*
> + * return 1 in case the ball was lost (cheap check)
> + */
> +static inline void
> +check_ball_lost(const uint64_t dp_idle) {
> + /* only check if we are in running state and have a breakval */
> + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
> + l2reflect_state == S_RUNNING &&
> + l2reflect_break_usec &&
> + record.rounds > WARMUP_ROUNDS) {
> + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64 " rounds\n", record.rounds);
> + l2reflect_state = S_LOCAL_TERM;
> + }
> +}
> +
> +/* main processing loop */
> +static void
> +l2reflect_main_loop(void)
> +{
> + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> + struct rte_mbuf *m;
> + unsigned int lcore_id;
> + unsigned int j, nb_rx, nb_evt;
> + uint16_t portid;
> + /* number of consequent idle passes */
> + uint64_t dp_idle = 0;
> + uint64_t diff = 0;
> + int sender;
> + struct my_magic_packet *pkt;
> + struct rte_ether_hdr *eth;
> + struct rte_epoll_event event;
> +
> + lcore_id = rte_lcore_id();
> +
> + RTE_LOG(INFO, L2REFLECT, "entering main loop on lcore %u\n", lcore_id);
> +
> + portid = l2reflect_port_number;
> + RTE_LOG(INFO, L2REFLECT, " -- lcoreid=%u portid=%u\n", lcore_id,
> + portid);
> + assert_link_status(portid);
> +
> +restart:
> + init_record();
> + rte_eth_stats_reset(portid);
> + l2reflect_state = S_ELECT_LEADER;
> + sender = elect_leader(portid);
> +
> + if (l2reflect_break_usec)
> + rte_eal_trace_generic_str("hit breakval");
> +
> + /* the leader election implements a latch (half-barrier).
> + * To ensure that the other party is in running state, we
> + * have to wait at least a full election period
> + */
> + rte_delay_ms(DELAY_ELECTION_MS * 2);
> +
> + /* we are the sender so we bring one ball into the game */
> + if (sender)
> + l2reflect_new_ball();
> +
> + /* reset the record */
> + init_record();
> + while (l2reflect_state == S_RUNNING) {
> + struct timespec rx_time;
> +
> + if (l2reflect_interrupt) {
> + rte_eth_dev_rx_intr_enable(portid, l2reflect_q);
> + /* wait for interrupt or timeout */
> + nb_evt = rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1,
> + l2reflect_sleep_msec);
> + rte_eth_dev_rx_intr_disable(portid, l2reflect_q);
> + if (nb_evt == 0 && rounds > WARMUP_ROUNDS)
> + ++record.timeouts;
> + }
> +
> + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
> + MAX_PKT_BURST);
> +
> + if (nb_rx) {
> + clock_gettime(CLOCK_MONOTONIC, &rx_time);
> + dp_idle = 0;
> + } else
> + ++dp_idle;
> +
> + for (j = 0; j < nb_rx; j++) {
> + m = pkts_burst[j];
> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> + pkt = (struct my_magic_packet *)eth;
> +
> + rte_prefetch0(eth);
> +
> + if (unlikely(!l2reflect_rx_filter(m))) {
> + rte_pktmbuf_free(m);
> + continue;
> + }
> +
> + /* remote is telling us to reset or stop */
> + if (unlikely(pkt->type == TRACE_TYPE_RSET)) {
> + free_pktbufs(pkts_burst, j, nb_rx);
> + goto restart;
> + }
> + if (unlikely(pkt->type == TRACE_TYPE_QUIT)) {
> + l2reflect_state = S_REMOTE_TERM;
> + free_pktbufs(pkts_burst, j, nb_rx);
> + break;
> + }
> +
> + if (likely(l2reflect_state == S_RUNNING)) {
> + if (unlikely(!process_packet(pkt, &rx_time, &diff))) {
> + l2reflect_state = S_LOCAL_TERM;
> + free_pktbufs(pkts_burst, j, nb_rx);
> + break;
> + }
> + l2reflect_simple_forward(m);
> + }
> + }
> + check_ball_lost(dp_idle);
> + }
> +
> + const int state_cpy = l2reflect_state;
> + switch (state_cpy) {
> + case S_RESET_TRX:
> + l2reflect_send_reset();
> + l2reflect_state = S_ELECT_LEADER;
> + /* fallthrough */
> + case S_ELECT_LEADER:
> + goto restart;
> + }
> +
> + if (state_cpy == S_LOCAL_TERM) {
> + rte_eal_trace_generic_str("local termination");
> + l2reflect_send_quit();
> + } else if (state_cpy == S_REMOTE_TERM) {
> + RTE_LOG(INFO, L2REFLECT, "received message that remote hit threshold (or is cancelled)\n");
> + }
> +}
> +
> +static int
> +l2reflect_launch_one_lcore(__rte_unused void *dummy)
> +{
> + struct sched_param param;
> + int err;
> +
> + if (sleep_start) {
> + RTE_LOG(INFO, L2REFLECT, "Sleeping and waiting for SIGCONT\n");
> + while (sleep_start) {
> + rte_delay_ms(10);
> + if (l2reflect_state == S_LOCAL_TERM)
> + rte_exit(EXIT_SUCCESS, "Quit\n");
> + }
> + RTE_LOG(INFO, L2REFLECT, "Got SIGCONT, continuing");
> + }
> + if (l2reflect_mlock) {
> + err = mlockall(MCL_CURRENT | MCL_FUTURE);
> + if (err)
> + rte_exit(EXIT_FAILURE, "mlockall failed: %s\n",
> + strerror(errno));
> + }
> + if (priority > 0 || policy != SCHED_OTHER) {
> + memset(¶m, 0, sizeof(param));
> + param.sched_priority = priority;
> + err = sched_setscheduler(0, policy, ¶m);
> + if (err)
> + rte_exit(EXIT_FAILURE,
> + "sched_setscheduler failed: %s\n",
> + strerror(errno));
> + }
> + if (l2reflect_interrupt) {
> + err = rte_eth_dev_rx_intr_ctl_q(l2reflect_port_number,
> + l2reflect_q,
> + RTE_EPOLL_PER_THREAD,
> + RTE_INTR_EVENT_ADD, NULL);
> + if (err)
> + rte_exit(EXIT_FAILURE,
> + "could not register I/O interrupt\n");
> + }
> + l2reflect_main_loop();
> + return 0;
> +}
> +
> +static void
> +sig_handler(int signum)
> +{
> + switch (signum) {
> + case SIGUSR1:
> + if (l2reflect_state == S_RUNNING)
> + l2reflect_state = S_RESET_TRX;
> + break;
> + case SIGUSR2:
> + l2reflect_output_hist = 1;
> + break;
> + case SIGCONT:
> + sleep_start = 0;
> + break;
> + case SIGHUP:
> + case SIGINT:
> + l2reflect_state = S_LOCAL_TERM;
> + break;
> + }
> +}
> +
> +int
> +main(int argc, char **argv)
> +{
> + struct rte_eth_dev_info dev_info;
> + struct rte_eth_txconf txconf;
> + int ret;
> + uint32_t i;
> + uint16_t nb_ports;
> + unsigned int lcore_id;
> + struct sigaction action;
> + bzero(&action, sizeof(action));
> + char mempool_name[128];
> + char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
> +
> + action.sa_handler = sig_handler;
> + if (sigaction(SIGHUP, &action, NULL) < 0 ||
> + sigaction(SIGUSR1, &action, NULL) < 0 ||
> + sigaction(SIGUSR2, &action, NULL) < 0 ||
> + sigaction(SIGCONT, &action, NULL) < 0 ||
> + sigaction(SIGINT, &action, NULL) < 0) {
> + rte_exit(EXIT_FAILURE, "Could not register signal handler\n");
> + }
> +
> + lcore_id = rte_lcore_id();
> +
> + if (check_opts_for_help(argc, argv, l2reflect_usage))
> + return 0;
> +
> + /* init EAL */
> + ret = rte_eal_init(argc, argv);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
> + argc -= ret;
> + argv += ret;
> +
> + /* parse application arguments (after the EAL ones) */
> + ret = l2reflect_parse_args(argc, argv);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE, "Invalid L2REFLECT arguments\n");
> +
> + snprintf(mempool_name, sizeof(mempool_name), "mbuf_pool_%d", getpid());
> + RTE_LOG(DEBUG, L2REFLECT, "About to create mempool \"%s\"\n", mempool_name);
> + /* create the mbuf pool */
> + l2reflect_pktmbuf_pool =
> + rte_pktmbuf_pool_create(mempool_name, NB_MBUF,
> + MAX_PKT_BURST, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
> + rte_socket_id());
> +
> + if (l2reflect_pktmbuf_pool == NULL)
> + rte_exit(EXIT_FAILURE,
> + "Cannot init/find mbuf pool name %s\nError: %d %s\n",
> + mempool_name, rte_errno, rte_strerror(rte_errno));
> +
> + nb_ports = rte_eth_dev_count_avail();
> + if (nb_ports == 0)
> + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
> + if (l2reflect_port_number + 1 > nb_ports)
> + rte_exit(EXIT_FAILURE, "Chosen port %d does not exist - bye\n",
> + l2reflect_port_number);
> + RTE_LOG(INFO, L2REFLECT, "We have %d ports and will use port %d\n", nb_ports,
> + l2reflect_port_number);
> +
> + rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
> + RTE_LOG(INFO, L2REFLECT, "Initializing port %u ...\n", l2reflect_port_number);
> + fflush(stdout);
> +
> + if (l2reflect_interrupt)
> + port_conf.intr_conf.rxq = 1;
> +
> + ret = config_port_max_pkt_len(&port_conf, &dev_info);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE,
> + "Invalid max packet length: %u (port %u)\n",
> + l2reflect_port_number, l2reflect_port_number);
> +
> + ret = rte_eth_dev_configure(l2reflect_port_number, 1, 1, &port_conf);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE,
> + "Cannot configure device: err=%s, port=%u\n",
> + strerror(-ret), l2reflect_port_number);
> +
> + ret = rte_eth_dev_adjust_nb_rx_tx_desc(l2reflect_port_number, &nb_rxd, &nb_txd);
> + if (ret != 0)
> + rte_exit(EXIT_FAILURE,
> + "Cannot adjust # of Rx/Tx descriptors to HW limits: err=%s, port=%u\n",
> + strerror(-ret), l2reflect_port_number);
> +
> + /* init RX queues */
> + for (i = 0; i <= l2reflect_q; i++) {
> + ret = rte_eth_rx_queue_setup(
> + l2reflect_port_number, i, nb_rxd,
> + rte_eth_dev_socket_id(l2reflect_port_number), NULL,
> + l2reflect_pktmbuf_pool);
> + if (ret < 0)
> + rte_exit(
> + EXIT_FAILURE,
> + "rte_eth_rx_queue_setup:err=%s, port=%u q=%u\n",
> + strerror(-ret), l2reflect_port_number, i);
> + }
> +
> + /* init one TX queue on each port */
> + txconf = dev_info.default_txconf;
> + txconf.offloads = port_conf.txmode.offloads;
> + ret = rte_eth_tx_queue_setup(
> + l2reflect_port_number, 0, nb_txd,
> + rte_eth_dev_socket_id(l2reflect_port_number), &txconf);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE,
> + "rte_eth_tx_queue_setup:err=%s, port=%u\n",
> + strerror(-ret), (unsigned int)l2reflect_port_number);
> +
> + /* Start device */
> + ret = rte_eth_dev_start(l2reflect_port_number);
> + if (ret < 0)
> + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%s, port=%u\n",
> + strerror(-ret), (unsigned int)l2reflect_port_number);
> +
> + rte_eth_macaddr_get(l2reflect_port_number, &l2reflect_port_eth_addr);
> +
> + /*
> + * When running on a Tap device, we might want to use a foreign
> + * mac address to make sure that the application and the Tap device
> + * do not share the same MAC addr. By that, we enforce that the
> + * bridge learns this address and correctly forwards unicast packets.
> + */
> + if (l2reflect_fake_mac)
> + l2reflect_port_eth_addr.addr_bytes[5] += 1;
> +
> + rte_ether_format_addr(mac_str, sizeof(mac_str),
> + &l2reflect_port_eth_addr);
> + RTE_LOG(INFO, L2REFLECT, "Port %u, MAC address: %s\n\n",
> + (unsigned int)l2reflect_port_number, mac_str);
> +
> + /*
> + * in quiet mode the primary executes the main packet loop
> + * otherwise the one worker does it and the primary prints stats
> + */
> + if (quiet) {
> + assert(rte_lcore_count() == 1);
> +#ifdef HAS_SYS_IO
> + if (disable_int) {
> + iopl(3);
> + asm("cli");
> + }
> +#endif
> + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n", getpid(), getppid());
> + l2reflect_launch_one_lcore(NULL);
> + } else {
> + assert(rte_lcore_count() == 2);
> + /* the worker reflects the packets */
> + RTE_LCORE_FOREACH_WORKER(lcore_id)
> + {
> + rte_eal_remote_launch(l2reflect_launch_one_lcore, NULL,
> + lcore_id);
> + }
> +
> + /* the primary prints the stats */
> + init_record();
> + l2reflect_stats_loop();
> + rte_eal_mp_wait_lcore();
> + }
> + rte_eal_cleanup();
> +
> + if (l2reflect_hist)
> + output_histogram_snapshot();
> +
> + cleanup_record();
> +
> + return 0;
> +}
> diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
> new file mode 100644
> index 0000000000..14b154ef06
> --- /dev/null
> +++ b/app/l2reflect/meson.build
> @@ -0,0 +1,21 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 Siemens AG
> +
> +cc = meson.get_compiler('c')
> +
> +jansson = dependency('libjansson', required: false)
> +
> +if not jansson.found()
> + jansson = cc.find_library('jansson', required: false)
> +endif
> +
> +if cc.has_header('sys/io.h')
> + cflags += '-DHAS_SYS_IO'
> +endif
> +
> +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
> +deps += ['ethdev']
I think some dependecies are missing:
ring, mbuf, mempool
> +if jansson.found()
> + ext_deps += jansson
> + cflags += '-DRTE_HAS_JANSSON'
> +endif
> diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
> new file mode 100644
> index 0000000000..c1fae5d5e4
> --- /dev/null
> +++ b/app/l2reflect/payload.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Siemens AG
> + */
> +#include <rte_ether.h>
> +
> +#ifndef _L2REFLECT_PAYLOAD_H_
> +#define _L2REFLECT_PAYLOAD_H_
> +
> +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
> +/* IEEE Std 802 - Local Experimental Ethertype */
> +#define ETHER_TYPE_L2REFLECT 0x88B5
> +
> +struct my_magic_packet {
> + /* l2 packet header */
> + struct rte_ether_hdr eth;
> + /* type of the l2reflect packet */
> + uint8_t type;
> + /* magic easy-to-spot pattern for tracing */
> + uint64_t magic;
> + /* break if latency is larger than this */
> + uint64_t breakval;
> + /* intended size of the packet */
> + uint64_t req_pkt_bytes;
> +};
> +
> +#endif /* _L2REFLECT_PAYLOAD_H_ */
> diff --git a/app/l2reflect/stats.c b/app/l2reflect/stats.c
> new file mode 100644
> index 0000000000..6bcbb7a2bf
> --- /dev/null
> +++ b/app/l2reflect/stats.c
> @@ -0,0 +1,225 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +#include <time.h>
> +#include <string.h>
> +#ifdef RTE_HAS_JANSSON
> +#include <jansson.h>
> +#endif
> +#include "colors.h"
> +#include "stats.h"
> +
> +unsigned int hist_bucket_usec;
> +struct stats record;
> +char *hist_filename;
> +
> +void
> +init_record(void)
> +{
> + record.max_round_ns = 0;
> + record.min_round_ns = MIN_INITIAL;
> + record.rounds = 0;
> + record.timeouts = 0;
> + record.avg_round_ns = 0;
> + if (l2reflect_hist) {
> + if (!record.hist_size) {
> + record.hist =
> + calloc(l2reflect_hist_buckets, sizeof(uint64_t));
> + record.hist_size = l2reflect_hist_buckets;
> + } else {
> + memset(record.hist, 0,
> + record.hist_size * sizeof(uint64_t));
> + }
> + }
> + clock_gettime(CLOCK_MONOTONIC, &record.time_start);
> +}
> +
> +void
> +cleanup_record(void)
> +{
> + if (l2reflect_hist) {
> + free(record.hist);
> + record.hist = NULL;
> + record.hist_size = 0;
> + }
> +}
> +
> +void
> +output_histogram_snapshot(void)
> +{
> + char *json = serialize_histogram(&record);
> + FILE *fd = stderr;
> + if (hist_filename)
> + fd = fopen(hist_filename, "w");
> + fputs(json, fd);
> + fputs("\n", fd);
> + free(json);
> + if (hist_filename)
> + fclose(fd);
> +}
> +
> +void
> +print_stats(void)
> +{
> + const char clr[] = { 27, '[', '2', 'J', '\0' };
> + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
> + const uint64_t bytes_in_gib = 0x40000000;
> + struct rte_eth_stats stats;
> + char mac_str_me[RTE_ETHER_ADDR_FMT_SIZE];
> + char mac_str_remote[RTE_ETHER_ADDR_FMT_SIZE];
> + char timeout_bound_str[32];
> + const char *str_jumbo = l2reflect_pkt_bytes > RTE_ETHER_MTU ?
> + "jumbo" : "no-jumbo";
> + struct timespec time_now;
> +
> + if (l2reflect_interrupt == 0)
> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), " polling");
> + else if (l2reflect_sleep_msec == -1u)
> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">= infinite");
> + else
> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">%9" PRIu64 "ms",
> + l2reflect_sleep_msec);
> +
> + rte_eth_stats_get(l2reflect_port_number, &stats);
> + rte_ether_format_addr(mac_str_me, sizeof(mac_str_me),
> + &l2reflect_port_eth_addr);
> + rte_ether_format_addr(mac_str_remote, sizeof(mac_str_remote),
> + &l2reflect_remote_eth_addr);
> +
> + clock_gettime(CLOCK_MONOTONIC, &time_now);
> + const uint64_t time_since_start = time_now.tv_sec - record.time_start.tv_sec;
> +
> + /* Clear screen and move to top left */
> + printf("%s%s", clr, topLeft);
> +
> + printf("%sNetworking Roundtrip Test%s\n", colors->green, colors->reset);
> + printf("\n%sPort statistics ====================================%s",
> + colors->magenta, colors->reset);
> +
> + printf("\nMe: %s <--> Remote: %s", mac_str_me, mac_str_remote);
> + printf("\nStatistics for port %d PID %d on lcore %d ---------"
> + "\nState: %-16s %10" PRIu64 " s"
> + "\nPackets tx: %22" PRIu64 "\nPackets rx: %22" PRIu64
> + "\nBytes tx: %24" PRIu64 " (%8.2f GiB)"
> + "\nBytes rx: %24" PRIu64 " (%8.2f GiB)"
> + "\nErrors tx: %23" PRIu64 "\nErrors rx: %23" PRIu64
> + "\nTimeouts rx: %21" PRIu64 " (%s)"
> + "\nPacketsize [Byte]: %15" PRIu64 " (%12s)",
> + l2reflect_port_number, getpid(), rte_lcore_id(),
> + runstate_tostring(l2reflect_state),
> + time_since_start,
> + stats.opackets, stats.ipackets, stats.obytes,
> + (double)stats.obytes / bytes_in_gib, stats.ibytes,
> + (double)stats.ibytes / bytes_in_gib, stats.oerrors,
> + stats.ierrors, record.timeouts,
> + timeout_bound_str, l2reflect_pkt_bytes, str_jumbo);
> + printf("\n%sPort timing statistics =============================%s",
> + colors->magenta, colors->reset);
> + if (l2reflect_state == S_ELECT_LEADER ||
> + record.min_round_ns == MIN_INITIAL) {
> + printf("\n\nBenchmark not started yet...\n");
> + } else {
> + printf("\n%sMax%s roundtrip: %19" PRIu64 " us", colors->red,
> + colors->reset, record.max_round_ns / 1000);
> + printf("\n%sAvg%s roundtrip: %19" PRIu64 " us", colors->yellow,
> + colors->reset,
> + record.rounds ? (uint64_t)(record.avg_round_ns /
> + record.rounds / 1000) :
> + 0);
> + printf("\n%sMin%s roundtrip: %19" PRIu64 " us", colors->green,
> + colors->reset, record.min_round_ns / 1000);
> + }
> + printf("\n%s====================================================%s\n",
> + colors->magenta, colors->reset);
> +}
> +
> +void
> +l2reflect_stats_loop(void)
> +{
> + while (!(l2reflect_state & (S_LOCAL_TERM | S_REMOTE_TERM))) {
> + print_stats();
> + if (l2reflect_hist && l2reflect_output_hist) {
> + output_histogram_snapshot();
> + l2reflect_output_hist = 0;
> + }
> + rte_delay_us_sleep(1000000);
> + }
> +}
> +
> +char *
> +serialize_histogram(__rte_unused const struct stats *record)
> +{
> +#ifndef RTE_HAS_JANSSON
> + return strdup("to print histogram, build with jansson support");
> +#else
> + char *str = NULL;
> + char key[8];
> + unsigned int i;
> + json_t *hist0, *cpu0, *all_cpus, *output;
> +
> + output = json_object();
> + /* version: 1 */
> + json_object_set_new(output, "version", json_integer(1));
> +
> + /* cpu 0 histogram */
> + hist0 = json_object();
> + for (i = 0; i < record->hist_size; ++i) {
> + /* only log positive numbers to meet jitterplot format */
> + if (record->hist[i] != 0) {
> + snprintf(key, 8, "%u", i * hist_bucket_usec);
> + json_object_set(hist0, key,
> + json_integer(record->hist[i]));
> + }
> + }
> +
> + /* in case of empty histogram, set these values to zero */
> + const json_int_t min_round_us =
> + record->rounds ? record->min_round_ns / 1000 : 0;
> + const json_int_t avg_round_us =
> + record->rounds ? record->avg_round_ns / record->rounds / 1000 : 0;
> + const json_int_t max_round_us =
> + record->rounds ? record->max_round_ns / 1000 : 0;
> +
> + /* cpu 0 stats */
> + cpu0 = json_object();
> + json_object_set_new(cpu0, "histogram", hist0);
> + json_object_set_new(cpu0, "count", json_integer(record->rounds));
> + json_object_set_new(cpu0, "min", json_integer(min_round_us));
> + json_object_set_new(cpu0, "max", json_integer(max_round_us));
> + json_object_set_new(cpu0, "avg", json_integer(avg_round_us));
> +
> + /* combine objects */
> + all_cpus = json_object();
> + json_object_set_new(all_cpus, "0", cpu0);
> + json_object_set_new(output, "cpu", all_cpus);
> +
> + str = json_dumps(output, JSON_ENSURE_ASCII | JSON_INDENT(2));
> +
> + /* cleanup */
> + json_decref(output);
> +
> + return str;
> +#endif
> +}
> +
> +const char *
> +runstate_tostring(int s)
> +{
> + switch (s) {
> + case S_ELECT_LEADER:
> + return "Electing";
> + case S_RESET_TRX:
> + return "Resetting";
> + case S_RUNNING:
> + return "Running";
> + case S_LOCAL_TERM:
> + return "Term. local";
> + case S_REMOTE_TERM:
> + return "Term. remote";
> + default:
> + return "Preparing";
> + }
> +}
> diff --git a/app/l2reflect/stats.h b/app/l2reflect/stats.h
> new file mode 100644
> index 0000000000..7f3dd9fffb
> --- /dev/null
> +++ b/app/l2reflect/stats.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +#ifndef _L2REFLECT_STATS_H_
> +#define _L2REFLECT_STATS_H_
> +#include <stdint.h>
> +#include <stdatomic.h>
> +#include <limits.h>
> +
> +#include <rte_ethdev.h>
> +
> +#include "l2reflect.h"
> +
> +#define MIN_INITIAL ULONG_MAX
> +#define HIST_NUM_BUCKETS_DEFAULT 128
> +
> +/* runtime statistics */
> +struct stats {
> + uint64_t max_round_ns;
> + uint64_t min_round_ns;
> + uint64_t rounds;
> + uint64_t timeouts;
> + double avg_round_ns;
> + unsigned int hist_size;
> + /* each slot is 10us */
> + uint64_t *hist;
> + struct timespec time_start;
> +};
> +
> +/* size of each histogram bucket in usec */
> +extern unsigned int hist_bucket_usec;
> +extern struct stats record;
> +extern char *hist_filename;
> +
> +void
> +init_record(void);
> +void
> +cleanup_record(void);
> +
> +void
> +l2reflect_stats_loop(void);
> +
> +/*
> + * Write the histogram to file / stdio without any locking.
> + * When called during the measurement, values are approximations
> + * (racy reads).
> + */
> +void
> +output_histogram_snapshot(void);
> +
> +/* Print out statistics on packets dropped */
> +void
> +print_stats(void);
> +
> +/*
> + * get a JSON representation of the record
> + */
> +char *
> +serialize_histogram(const struct stats *record);
> +
> +/*
> + * get a string representation of the current runstate
> + */
> +const char *
> +runstate_tostring(int s);
> +
> +#endif /* _L2REFLECT_STATS_H_ */
> diff --git a/app/l2reflect/utils.c b/app/l2reflect/utils.c
> new file mode 100644
> index 0000000000..4116b986d2
> --- /dev/null
> +++ b/app/l2reflect/utils.c
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2021 Siemens AG
> + */
> +
> +#include <rte_ethdev.h>
> +#include <rte_log.h>
> +#include <rte_ethdev.h>
> +
> +#include "utils.h"
> +#include "l2reflect.h"
> +
> +void
> +assert_link_status(int port_id)
> +{
> + struct rte_eth_link link;
> + uint8_t rep_cnt = MAX_REPEAT_TIMES;
> + int link_get_err = -EINVAL;
> +
> + memset(&link, 0, sizeof(link));
> + do {
> + link_get_err = rte_eth_link_get_nowait(port_id, &link);
> + if (link_get_err == 0 && link.link_status == RTE_ETH_LINK_UP)
> + break;
> + rte_delay_ms(CHECK_INTERVAL);
> + RTE_LOG(INFO, L2REFLECT, "Link not ready yet, try again...\n");
> + } while (--rep_cnt && (l2reflect_state != S_LOCAL_TERM));
> +
> + if (link_get_err < 0)
> + rte_exit(EXIT_FAILURE, "error: link get is failing: %s\n",
> + rte_strerror(-link_get_err));
> + if (link.link_status == RTE_ETH_LINK_DOWN)
> + rte_exit(EXIT_FAILURE, "error: link is still down\n");
> +
> + const char *linkspeed_str = rte_eth_link_speed_to_str(link.link_speed);
> + RTE_LOG(INFO, L2REFLECT,
> + "Link status on port %d: speed: %s, duplex: %s\n",
> + port_id, linkspeed_str,
> + link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX ? "full" : "half");
> +}
> +
> +uint32_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> + uint32_t overhead_len;
> + if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> + overhead_len = max_rx_pktlen - max_mtu;
> + else
> + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> + return overhead_len;
> +}
> +
> +int
> +config_port_max_pkt_len(struct rte_eth_conf *conf,
> + struct rte_eth_dev_info *dev_info)
> +{
> + uint32_t overhead_len;
> + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
> + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN)
> + return -1;
> + overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
> + dev_info->max_mtu);
> + conf->rxmode.mtu = MAX(l2reflect_pkt_bytes - overhead_len,
> + dev_info->min_mtu);
> + if (conf->rxmode.mtu > RTE_ETHER_MTU)
> + conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
> + return 0;
> +}
> diff --git a/app/l2reflect/utils.h b/app/l2reflect/utils.h
> new file mode 100644
> index 0000000000..177ad8cda6
> --- /dev/null
> +++ b/app/l2reflect/utils.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2021 Siemens AG
> + */
> +
> +#ifndef _L2REFLECT_UTILS_H_
> +#define _L2REFLECT_UTILS_H_
> +
> +#define MAX_REPEAT_TIMES 30
> +#define CHECK_INTERVAL 2000
> +
> +void assert_link_status(int port_id);
> +
> +uint32_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu);
> +
> +int
> +config_port_max_pkt_len(struct rte_eth_conf *conf,
> + struct rte_eth_dev_info *dev_info);
> +
> +#endif /* _L2REFLECT_UTILS_H_ */
> diff --git a/app/meson.build b/app/meson.build
> index 0ea04cadeb..3593322ab2 100644
> --- a/app/meson.build
> +++ b/app/meson.build
> @@ -3,6 +3,7 @@
>
> apps = [
> 'dumpcap',
> + 'l2reflect',
> 'pdump',
> 'proc-info',
> 'test-acl',
On Fri, Sep 2, 2022 at 2:16 PM Felix Moessbauer
<felix.moessbauer@siemens.com> wrote:
>
> The l2reflect application implements a ping-pong benchmark to
> measure the latency between two instances. For communication,
> we use raw ethernet and send one packet at a time. The timing data
> is collected locally and min/max/avg values are displayed in a TUI.
> Finally, a histogram of the latencies is printed which can be
One highlevel comment,
IMO, We don't need to add code for capturing in histogram and analyze
it in C code.
We can simply use, rte_trace and so that we can visualize with
standard trace viewers.
Also add add python based script parse the CTF emitted by trace and find
min/max/avg values to simply the code.
> further processed with the jitterdebugger visualization scripts.
> To debug latency spikes, a max threshold can be defined.
> If it is hit, a trace point is created on both instances.
>
> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> Signed-off-by: Henning Schild <henning.schild@siemens.com>
> ---
> app/l2reflect/colors.c | 34 ++
> app/l2reflect/colors.h | 19 +
> app/l2reflect/l2reflect.h | 53 ++
> app/l2reflect/main.c | 1007 +++++++++++++++++++++++++++++++++++++
> app/l2reflect/meson.build | 21 +
> app/l2reflect/payload.h | 26 +
> app/l2reflect/stats.c | 225 +++++++++
> app/l2reflect/stats.h | 67 +++
> app/l2reflect/utils.c | 67 +++
> app/l2reflect/utils.h | 20 +
> app/meson.build | 1 +
Need to add a doc for this example at doc/guides/sample_app_ug/
>
> diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
> new file mode 100644
> index 0000000000..af881d8788
> --- /dev/null
> +++ b/app/l2reflect/colors.c
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
2022. Please fix in all files.
> diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
> new file mode 100644
> index 0000000000..922bd7c281
> --- /dev/null
> +++ b/app/l2reflect/l2reflect.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Siemens AG
> + */
> +
> +#include <stdatomic.h>
> +
> +#ifndef _L2REFLECT_L2REFLECT_H_
> +#define _L2REFLECT_L2REFLECT_H_
> +
> +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
Don't define RTE_ symbols in application.
> +
> +/* max size that common 1G NICs support */
> +#define MAX_JUMBO_PKT_LEN 9600
> +
> +/* Used to compare MAC addresses. */
> +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
> +
> +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
> +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
Use RTE_MAX and friends. Similar comments for other macros.
> +extern int l2reflect_hist;
> +extern unsigned int l2reflect_hist_buckets;
> +extern atomic_int l2reflect_output_hist;
> +extern int l2reflect_interrupt;
> +extern uint64_t l2reflect_sleep_msec;
> +extern uint64_t l2reflect_pkt_bytes;
> +extern uint16_t l2reflect_port_number;
> +extern atomic_int l2reflect_state;
> +extern struct rte_ether_addr l2reflect_port_eth_addr;
> +extern struct rte_ether_addr l2reflect_remote_eth_addr;
It is better to move all global variables to app_ctx kind of structure and
allocate from hugepage as some of them are used in fastpath and easy to track.
> +/* Configurable number of RX/TX ring descriptors */
> +#define RTE_TEST_RX_DESC_DEFAULT 128
> +#define RTE_TEST_TX_DESC_DEFAULT 128
Don't define RTE_ symbols in application.
> +/* Send a burst of one packet */
> +static inline int
> +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
> +{
> + unsigned int ret;
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> + uint16_t type;
> +
> + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
> + pkt = (struct my_magic_packet *)eth;
> + type = pkt->type;
> +
> + if (likely(type == TRACE_TYPE_DATA))
> + clock_gettime(CLOCK_MONOTONIC, &last_sent);
Use rte_rdtsc_precise() instead of system call in fastpath
> + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
> + if (unlikely(ret < 1))
> + rte_pktmbuf_free(*m);
> + return 0;
> +}
> +
> +static inline void
> +l2reflect_simple_forward(struct rte_mbuf *m)
> +{
> + struct rte_ether_hdr *eth;
> + struct my_magic_packet *pkt;
> +
> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> + pkt = (struct my_magic_packet *)eth;
> +
> + /* dst addr */
> + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
> + /* src addr */
> + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
> +
> + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
> + rte_eal_trace_generic_str("sending traced packet");
Create app_trace tracepoint specific to application with new type to
avoid emit the string.
> +
> + l2reflect_send_packet(&m, l2reflect_port_number);
> +}
> +
> +/*
> + * process a single packet.
> + * return false if latency threshold is hit
> + */
> +static inline int
> +process_packet(
> + struct my_magic_packet *pkt,
> + struct timespec *rx_time,
> + uint64_t *diff)
> +{
> + if (pkt->type == TRACE_TYPE_DATA) {
> + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
> + *diff = calcdiff_ns(last_recv, last_sent);
> + if (!unlikely(add_to_record(*diff))) {
> + /* TODO: improve tracing */
> + rte_eal_trace_generic_u64(record.max_round_ns / 1000);
Add a new trace point as an application specific item.
> + return 0;
> + }
> + }
> + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
> + rte_eal_trace_generic_str("received traced packet");
Add a new trace point as an application specific item.
> +
> + return 1;
> +}
> +
> +/*
> + * free all packet buffers in the range [begin, end[.
> + */
> +static void
> +free_pktbufs(
> + struct rte_mbuf **bufs,
> + int begin,
> + int end)
> +{
> + int i = begin;
> + for (; i < end; i++)
> + rte_pktmbuf_free(bufs[0]);
Freeing the same packet multiple times?
Use burst varint if possible.
> +}
> +
> +/*
> + * return 1 in case the ball was lost (cheap check)
> + */
> +static inline void
> +check_ball_lost(const uint64_t dp_idle) {
> + /* only check if we are in running state and have a breakval */
> + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
> + l2reflect_state == S_RUNNING &&
> + l2reflect_break_usec &&
> + record.rounds > WARMUP_ROUNDS) {
> + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64 " rounds\n", record.rounds);
> + l2reflect_state = S_LOCAL_TERM;
> + }
> +}
> +}
> +
> + /*
> + * in quiet mode the primary executes the main packet loop
> + * otherwise the one worker does it and the primary prints stats
> + */
> + if (quiet) {
> + assert(rte_lcore_count() == 1);
> +#ifdef HAS_SYS_IO
> + if (disable_int) {
> + iopl(3);
> + asm("cli");
Do we need x86 specific items in application?
> + }
> +#endif
> + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n", getpid(), getppid());
> + l2reflect_launch_one_lcore(NULL);
> + } else {
> + assert(rte_lcore_count() == 2);
> + /* the worker reflects the packets */
> + RTE_LCORE_FOREACH_WORKER(lcore_id)
> + {
> + rte_eal_remote_launch(l2reflect_launch_one_lcore, NULL,
> + lcore_id);
> + }
> +
> + /* the primary prints the stats */
> + init_record();
> + l2reflect_stats_loop();
> + rte_eal_mp_wait_lcore();
> + }
> + rte_eal_cleanup();
> +
> + if (l2reflect_hist)
> + output_histogram_snapshot();
> +
> + cleanup_record();
> +
> + return 0;
> +}
> diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
> new file mode 100644
> index 0000000000..14b154ef06
> --- /dev/null
> +++ b/app/l2reflect/meson.build
> @@ -0,0 +1,21 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2022 Siemens AG
> +
> +cc = meson.get_compiler('c')
> +
> +jansson = dependency('libjansson', required: false)
> +
> +if not jansson.found()
> + jansson = cc.find_library('jansson', required: false)
> +endif
> +
> +if cc.has_header('sys/io.h')
> + cflags += '-DHAS_SYS_IO'
> +endif
> +
> +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
> +deps += ['ethdev']
> +if jansson.found()
> + ext_deps += jansson
> + cflags += '-DRTE_HAS_JANSSON'
> +endif
> diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
> new file mode 100644
> index 0000000000..c1fae5d5e4
> --- /dev/null
> +++ b/app/l2reflect/payload.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Siemens AG
> + */
> +#include <rte_ether.h>
> +
> +#ifndef _L2REFLECT_PAYLOAD_H_
> +#define _L2REFLECT_PAYLOAD_H_
> +
> +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
> +/* IEEE Std 802 - Local Experimental Ethertype */
> +#define ETHER_TYPE_L2REFLECT 0x88B5
> +
> +struct my_magic_packet {
> + /* l2 packet header */
> + struct rte_ether_hdr eth;
> + /* type of the l2reflect packet */
> + uint8_t type;
> + /* magic easy-to-spot pattern for tracing */
> + uint64_t magic;
> + /* break if latency is larger than this */
> + uint64_t breakval;
> + /* intended size of the packet */
> + uint64_t req_pkt_bytes;
> +};
> +
> +#endif /* _L2REFLECT_PAYLOAD_H_ */
21/09/2022 16:42, Jerin Jacob:
> On Fri, Sep 2, 2022 at 2:16 PM Felix Moessbauer
> <felix.moessbauer@siemens.com> wrote:
> >
> > The l2reflect application implements a ping-pong benchmark to
> > measure the latency between two instances. For communication,
> > we use raw ethernet and send one packet at a time. The timing data
> > is collected locally and min/max/avg values are displayed in a TUI.
> > Finally, a histogram of the latencies is printed which can be
>
> One highlevel comment,
>
> IMO, We don't need to add code for capturing in histogram and analyze
> it in C code.
> We can simply use, rte_trace and so that we can visualize with
> standard trace viewers.
> Also add add python based script parse the CTF emitted by trace and find
> min/max/avg values to simply the code.
+1, let's keep it minimal and simple
Am Wed, 21 Sep 2022 20:12:21 +0530
schrieb Jerin Jacob <jerinjacobk@gmail.com>:
> On Fri, Sep 2, 2022 at 2:16 PM Felix Moessbauer
> <felix.moessbauer@siemens.com> wrote:
> >
> > The l2reflect application implements a ping-pong benchmark to
> > measure the latency between two instances. For communication,
> > we use raw ethernet and send one packet at a time. The timing data
> > is collected locally and min/max/avg values are displayed in a TUI.
> > Finally, a histogram of the latencies is printed which can be
>
> One highlevel comment,
>
> IMO, We don't need to add code for capturing in histogram and analyze
> it in C code.
> We can simply use, rte_trace and so that we can visualize with
> standard trace viewers.
> Also add add python based script parse the CTF emitted by trace and
> find min/max/avg values to simply the code.
We need some logic in the application to have a live view of
min/max/avg. More importantly for the break threshold where we stop the
measurement and the tracing ... so you find the root-cause of your
spike at the end of your trace.
And a trace is only so long until the ring wraps.
So tracing could be the input, but the processing should happen live.
To react on a max threshold, to show live data, to not miss anything
even when running for weeks.
Given that trace processing would need to be live, would you still
suggest to use traces as input and use python to process them? I guess
that could be done with another process and IPC but not sure it would
be nice or how much code it would save.
Henning
> > further processed with the jitterdebugger visualization scripts.
> > To debug latency spikes, a max threshold can be defined.
> > If it is hit, a trace point is created on both instances.
> >
> > Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> > Signed-off-by: Henning Schild <henning.schild@siemens.com>
> > ---
> > app/l2reflect/colors.c | 34 ++
> > app/l2reflect/colors.h | 19 +
> > app/l2reflect/l2reflect.h | 53 ++
> > app/l2reflect/main.c | 1007
> > +++++++++++++++++++++++++++++++++++++ app/l2reflect/meson.build |
> > 21 + app/l2reflect/payload.h | 26 +
> > app/l2reflect/stats.c | 225 +++++++++
> > app/l2reflect/stats.h | 67 +++
> > app/l2reflect/utils.c | 67 +++
> > app/l2reflect/utils.h | 20 +
> > app/meson.build | 1 +
>
> Need to add a doc for this example at doc/guides/sample_app_ug/
>
> >
> > diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
> > new file mode 100644
> > index 0000000000..af881d8788
> > --- /dev/null
> > +++ b/app/l2reflect/colors.c
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
>
> 2022. Please fix in all files.
>
> > diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
> > new file mode 100644
> > index 0000000000..922bd7c281
> > --- /dev/null
> > +++ b/app/l2reflect/l2reflect.h
> > @@ -0,0 +1,53 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +
> > +#include <stdatomic.h>
> > +
> > +#ifndef _L2REFLECT_L2REFLECT_H_
> > +#define _L2REFLECT_L2REFLECT_H_
> > +
> > +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
>
> Don't define RTE_ symbols in application.
>
> > +
> > +/* max size that common 1G NICs support */
> > +#define MAX_JUMBO_PKT_LEN 9600
> > +
> > +/* Used to compare MAC addresses. */
> > +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
> > +
> > +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
> > +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
>
>
> Use RTE_MAX and friends. Similar comments for other macros.
>
>
> > +extern int l2reflect_hist;
> > +extern unsigned int l2reflect_hist_buckets;
> > +extern atomic_int l2reflect_output_hist;
> > +extern int l2reflect_interrupt;
> > +extern uint64_t l2reflect_sleep_msec;
> > +extern uint64_t l2reflect_pkt_bytes;
> > +extern uint16_t l2reflect_port_number;
> > +extern atomic_int l2reflect_state;
> > +extern struct rte_ether_addr l2reflect_port_eth_addr;
> > +extern struct rte_ether_addr l2reflect_remote_eth_addr;
>
> It is better to move all global variables to app_ctx kind of
> structure and allocate from hugepage as some of them are used in
> fastpath and easy to track.
>
> > +/* Configurable number of RX/TX ring descriptors */
> > +#define RTE_TEST_RX_DESC_DEFAULT 128
> > +#define RTE_TEST_TX_DESC_DEFAULT 128
>
> Don't define RTE_ symbols in application.
>
> > +/* Send a burst of one packet */
> > +static inline int
> > +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
> > +{
> > + unsigned int ret;
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > + uint16_t type;
> > +
> > + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
> > + pkt = (struct my_magic_packet *)eth;
> > + type = pkt->type;
> > +
> > + if (likely(type == TRACE_TYPE_DATA))
> > + clock_gettime(CLOCK_MONOTONIC, &last_sent);
>
> Use rte_rdtsc_precise() instead of system call in fastpath
>
> > + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
> > + if (unlikely(ret < 1))
> > + rte_pktmbuf_free(*m);
> > + return 0;
> > +}
> > +
> > +static inline void
> > +l2reflect_simple_forward(struct rte_mbuf *m)
> > +{
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > +
> > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > + pkt = (struct my_magic_packet *)eth;
> > +
> > + /* dst addr */
> > + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
> > + /* src addr */
> > + rte_ether_addr_copy(&l2reflect_port_eth_addr,
> > ð->src_addr); +
> > + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
> > + rte_eal_trace_generic_str("sending traced packet");
> >
>
> Create app_trace tracepoint specific to application with new type to
> avoid emit the string.
>
>
> > +
> > + l2reflect_send_packet(&m, l2reflect_port_number);
> > +}
>
> > +
> > +/*
> > + * process a single packet.
> > + * return false if latency threshold is hit
> > + */
> > +static inline int
> > +process_packet(
> > + struct my_magic_packet *pkt,
> > + struct timespec *rx_time,
> > + uint64_t *diff)
> > +{
> > + if (pkt->type == TRACE_TYPE_DATA) {
> > + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
> > + *diff = calcdiff_ns(last_recv, last_sent);
> > + if (!unlikely(add_to_record(*diff))) {
> > + /* TODO: improve tracing */
> > +
> > rte_eal_trace_generic_u64(record.max_round_ns / 1000);
>
> Add a new trace point as an application specific item.
>
> > + return 0;
> > + }
> > + }
> > + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
> > + rte_eal_trace_generic_str("received traced
> > packet");
>
> Add a new trace point as an application specific item.
>
> > +
> > + return 1;
> > +}
> > +
> > +/*
> > + * free all packet buffers in the range [begin, end[.
> > + */
> > +static void
> > +free_pktbufs(
> > + struct rte_mbuf **bufs,
> > + int begin,
> > + int end)
> > +{
> > + int i = begin;
> > + for (; i < end; i++)
> > + rte_pktmbuf_free(bufs[0]);
>
> Freeing the same packet multiple times?
>
> Use burst varint if possible.
>
>
>
> > +}
> > +
> > +/*
> > + * return 1 in case the ball was lost (cheap check)
> > + */
> > +static inline void
> > +check_ball_lost(const uint64_t dp_idle) {
> > + /* only check if we are in running state and have a
> > breakval */
> > + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
> > + l2reflect_state == S_RUNNING &&
> > + l2reflect_break_usec &&
> > + record.rounds > WARMUP_ROUNDS) {
> > + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64
> > " rounds\n", record.rounds);
> > + l2reflect_state = S_LOCAL_TERM;
> > + }
> > +}
> > +}
> > +
> > + /*
> > + * in quiet mode the primary executes the main packet loop
> > + * otherwise the one worker does it and the primary prints
> > stats
> > + */
> > + if (quiet) {
> > + assert(rte_lcore_count() == 1);
> > +#ifdef HAS_SYS_IO
> > + if (disable_int) {
> > + iopl(3);
> > + asm("cli");
>
> Do we need x86 specific items in application?
>
> > + }
> > +#endif
> > + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n",
> > getpid(), getppid());
> > + l2reflect_launch_one_lcore(NULL);
> > + } else {
> > + assert(rte_lcore_count() == 2);
> > + /* the worker reflects the packets */
> > + RTE_LCORE_FOREACH_WORKER(lcore_id)
> > + {
> > +
> > rte_eal_remote_launch(l2reflect_launch_one_lcore, NULL,
> > + lcore_id);
> > + }
> > +
> > + /* the primary prints the stats */
> > + init_record();
> > + l2reflect_stats_loop();
> > + rte_eal_mp_wait_lcore();
> > + }
> > + rte_eal_cleanup();
> > +
> > + if (l2reflect_hist)
> > + output_histogram_snapshot();
> > +
> > + cleanup_record();
> > +
> > + return 0;
> > +}
> > diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
> > new file mode 100644
> > index 0000000000..14b154ef06
> > --- /dev/null
> > +++ b/app/l2reflect/meson.build
> > @@ -0,0 +1,21 @@
> > +# SPDX-License-Identifier: BSD-3-Clause
> > +# Copyright(c) 2022 Siemens AG
> > +
> > +cc = meson.get_compiler('c')
> > +
> > +jansson = dependency('libjansson', required: false)
> > +
> > +if not jansson.found()
> > + jansson = cc.find_library('jansson', required: false)
> > +endif
> > +
> > +if cc.has_header('sys/io.h')
> > + cflags += '-DHAS_SYS_IO'
> > +endif
> > +
> > +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
> > +deps += ['ethdev']
> > +if jansson.found()
> > + ext_deps += jansson
> > + cflags += '-DRTE_HAS_JANSSON'
> > +endif
> > diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
> > new file mode 100644
> > index 0000000000..c1fae5d5e4
> > --- /dev/null
> > +++ b/app/l2reflect/payload.h
> > @@ -0,0 +1,26 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Siemens AG
> > + */
> > +#include <rte_ether.h>
> > +
> > +#ifndef _L2REFLECT_PAYLOAD_H_
> > +#define _L2REFLECT_PAYLOAD_H_
> > +
> > +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
> > +/* IEEE Std 802 - Local Experimental Ethertype */
> > +#define ETHER_TYPE_L2REFLECT 0x88B5
> > +
> > +struct my_magic_packet {
> > + /* l2 packet header */
> > + struct rte_ether_hdr eth;
> > + /* type of the l2reflect packet */
> > + uint8_t type;
> > + /* magic easy-to-spot pattern for tracing */
> > + uint64_t magic;
> > + /* break if latency is larger than this */
> > + uint64_t breakval;
> > + /* intended size of the packet */
> > + uint64_t req_pkt_bytes;
> > +};
> > +
> > +#endif /* _L2REFLECT_PAYLOAD_H_ */
On Wed, Sep 21, 2022 at 8:43 PM Henning Schild
<henning.schild@siemens.com> wrote:
>
> Am Wed, 21 Sep 2022 20:12:21 +0530
> schrieb Jerin Jacob <jerinjacobk@gmail.com>:
>
> > On Fri, Sep 2, 2022 at 2:16 PM Felix Moessbauer
> > <felix.moessbauer@siemens.com> wrote:
> > >
> > > The l2reflect application implements a ping-pong benchmark to
> > > measure the latency between two instances. For communication,
> > > we use raw ethernet and send one packet at a time. The timing data
> > > is collected locally and min/max/avg values are displayed in a TUI.
> > > Finally, a histogram of the latencies is printed which can be
> >
> > One highlevel comment,
> >
> > IMO, We don't need to add code for capturing in histogram and analyze
> > it in C code.
> > We can simply use, rte_trace and so that we can visualize with
> > standard trace viewers.
> > Also add add python based script parse the CTF emitted by trace and
> > find min/max/avg values to simply the code.
>
> We need some logic in the application to have a live view of
> min/max/avg. More importantly for the break threshold where we stop the
> measurement and the tracing ... so you find the root-cause of your
> spike at the end of your trace.
>
> And a trace is only so long until the ring wraps.
>
> So tracing could be the input, but the processing should happen live.
> To react on a max threshold, to show live data, to not miss anything
> even when running for weeks.
>
> Given that trace processing would need to be live, would you still
> suggest to use traces as input and use python to process them? I guess
> that could be done with another process and IPC but not sure it would
> be nice or how much code it would save.
Yes. I would suggest to take this path to accommodate more use case in
future like
- finding CPU idle time
-latency for crypto/dmadev/eventdev enqueue to dequeue
-histogram of occupancy for different queues
etc
This would translate to
1)Adding app/proc-info style app to pull the live trace from primary process
2)Add plugin framework to operate on live trace
3)Add a plugin for this specific use case
4)If needed, a communication from secondary to primary to take action
based on live analysis
like in this case if stop the primary when latency exceeds certain limit
On the plus side,
If we move all analysis and presentation to new generic application,
your packet forwarding
logic can simply move as new fwd_engine in testpmd(see
app/test-pmd/noisy_vnf.c as a example for fwdengine)
Ideally "eal: add lcore poll busyness telemetry"[1] could converge to
this model.
[1]
https://patches.dpdk.org/project/dpdk/patch/20220914092929.1159773-2-kevin.laatz@intel.com/
>
> Henning
>
> > > further processed with the jitterdebugger visualization scripts.
> > > To debug latency spikes, a max threshold can be defined.
> > > If it is hit, a trace point is created on both instances.
> > >
> > > Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> > > Signed-off-by: Henning Schild <henning.schild@siemens.com>
> > > ---
> > > app/l2reflect/colors.c | 34 ++
> > > app/l2reflect/colors.h | 19 +
> > > app/l2reflect/l2reflect.h | 53 ++
> > > app/l2reflect/main.c | 1007
> > > +++++++++++++++++++++++++++++++++++++ app/l2reflect/meson.build |
> > > 21 + app/l2reflect/payload.h | 26 +
> > > app/l2reflect/stats.c | 225 +++++++++
> > > app/l2reflect/stats.h | 67 +++
> > > app/l2reflect/utils.c | 67 +++
> > > app/l2reflect/utils.h | 20 +
> > > app/meson.build | 1 +
> >
> > Need to add a doc for this example at doc/guides/sample_app_ug/
> >
> > >
> > > diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
> > > new file mode 100644
> > > index 0000000000..af881d8788
> > > --- /dev/null
> > > +++ b/app/l2reflect/colors.c
> > > @@ -0,0 +1,34 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2020 Siemens AG
> >
> > 2022. Please fix in all files.
> >
> > > diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
> > > new file mode 100644
> > > index 0000000000..922bd7c281
> > > --- /dev/null
> > > +++ b/app/l2reflect/l2reflect.h
> > > @@ -0,0 +1,53 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2020 Siemens AG
> > > + */
> > > +
> > > +#include <stdatomic.h>
> > > +
> > > +#ifndef _L2REFLECT_L2REFLECT_H_
> > > +#define _L2REFLECT_L2REFLECT_H_
> > > +
> > > +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
> >
> > Don't define RTE_ symbols in application.
> >
> > > +
> > > +/* max size that common 1G NICs support */
> > > +#define MAX_JUMBO_PKT_LEN 9600
> > > +
> > > +/* Used to compare MAC addresses. */
> > > +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
> > > +
> > > +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
> > > +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
> >
> >
> > Use RTE_MAX and friends. Similar comments for other macros.
> >
> >
> > > +extern int l2reflect_hist;
> > > +extern unsigned int l2reflect_hist_buckets;
> > > +extern atomic_int l2reflect_output_hist;
> > > +extern int l2reflect_interrupt;
> > > +extern uint64_t l2reflect_sleep_msec;
> > > +extern uint64_t l2reflect_pkt_bytes;
> > > +extern uint16_t l2reflect_port_number;
> > > +extern atomic_int l2reflect_state;
> > > +extern struct rte_ether_addr l2reflect_port_eth_addr;
> > > +extern struct rte_ether_addr l2reflect_remote_eth_addr;
> >
> > It is better to move all global variables to app_ctx kind of
> > structure and allocate from hugepage as some of them are used in
> > fastpath and easy to track.
> >
> > > +/* Configurable number of RX/TX ring descriptors */
> > > +#define RTE_TEST_RX_DESC_DEFAULT 128
> > > +#define RTE_TEST_TX_DESC_DEFAULT 128
> >
> > Don't define RTE_ symbols in application.
> >
> > > +/* Send a burst of one packet */
> > > +static inline int
> > > +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
> > > +{
> > > + unsigned int ret;
> > > + struct rte_ether_hdr *eth;
> > > + struct my_magic_packet *pkt;
> > > + uint16_t type;
> > > +
> > > + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
> > > + pkt = (struct my_magic_packet *)eth;
> > > + type = pkt->type;
> > > +
> > > + if (likely(type == TRACE_TYPE_DATA))
> > > + clock_gettime(CLOCK_MONOTONIC, &last_sent);
> >
> > Use rte_rdtsc_precise() instead of system call in fastpath
> >
> > > + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
> > > + if (unlikely(ret < 1))
> > > + rte_pktmbuf_free(*m);
> > > + return 0;
> > > +}
> > > +
> > > +static inline void
> > > +l2reflect_simple_forward(struct rte_mbuf *m)
> > > +{
> > > + struct rte_ether_hdr *eth;
> > > + struct my_magic_packet *pkt;
> > > +
> > > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > > + pkt = (struct my_magic_packet *)eth;
> > > +
> > > + /* dst addr */
> > > + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
> > > + /* src addr */
> > > + rte_ether_addr_copy(&l2reflect_port_eth_addr,
> > > ð->src_addr); +
> > > + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
> > > + rte_eal_trace_generic_str("sending traced packet");
> > >
> >
> > Create app_trace tracepoint specific to application with new type to
> > avoid emit the string.
> >
> >
> > > +
> > > + l2reflect_send_packet(&m, l2reflect_port_number);
> > > +}
> >
> > > +
> > > +/*
> > > + * process a single packet.
> > > + * return false if latency threshold is hit
> > > + */
> > > +static inline int
> > > +process_packet(
> > > + struct my_magic_packet *pkt,
> > > + struct timespec *rx_time,
> > > + uint64_t *diff)
> > > +{
> > > + if (pkt->type == TRACE_TYPE_DATA) {
> > > + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
> > > + *diff = calcdiff_ns(last_recv, last_sent);
> > > + if (!unlikely(add_to_record(*diff))) {
> > > + /* TODO: improve tracing */
> > > +
> > > rte_eal_trace_generic_u64(record.max_round_ns / 1000);
> >
> > Add a new trace point as an application specific item.
> >
> > > + return 0;
> > > + }
> > > + }
> > > + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
> > > + rte_eal_trace_generic_str("received traced
> > > packet");
> >
> > Add a new trace point as an application specific item.
> >
> > > +
> > > + return 1;
> > > +}
> > > +
> > > +/*
> > > + * free all packet buffers in the range [begin, end[.
> > > + */
> > > +static void
> > > +free_pktbufs(
> > > + struct rte_mbuf **bufs,
> > > + int begin,
> > > + int end)
> > > +{
> > > + int i = begin;
> > > + for (; i < end; i++)
> > > + rte_pktmbuf_free(bufs[0]);
> >
> > Freeing the same packet multiple times?
> >
> > Use burst varint if possible.
> >
> >
> >
> > > +}
> > > +
> > > +/*
> > > + * return 1 in case the ball was lost (cheap check)
> > > + */
> > > +static inline void
> > > +check_ball_lost(const uint64_t dp_idle) {
> > > + /* only check if we are in running state and have a
> > > breakval */
> > > + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
> > > + l2reflect_state == S_RUNNING &&
> > > + l2reflect_break_usec &&
> > > + record.rounds > WARMUP_ROUNDS) {
> > > + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64
> > > " rounds\n", record.rounds);
> > > + l2reflect_state = S_LOCAL_TERM;
> > > + }
> > > +}
> > > +}
> > > +
> > > + /*
> > > + * in quiet mode the primary executes the main packet loop
> > > + * otherwise the one worker does it and the primary prints
> > > stats
> > > + */
> > > + if (quiet) {
> > > + assert(rte_lcore_count() == 1);
> > > +#ifdef HAS_SYS_IO
> > > + if (disable_int) {
> > > + iopl(3);
> > > + asm("cli");
> >
> > Do we need x86 specific items in application?
> >
> > > + }
> > > +#endif
> > > + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n",
> > > getpid(), getppid());
> > > + l2reflect_launch_one_lcore(NULL);
> > > + } else {
> > > + assert(rte_lcore_count() == 2);
> > > + /* the worker reflects the packets */
> > > + RTE_LCORE_FOREACH_WORKER(lcore_id)
> > > + {
> > > +
> > > rte_eal_remote_launch(l2reflect_launch_one_lcore, NULL,
> > > + lcore_id);
> > > + }
> > > +
> > > + /* the primary prints the stats */
> > > + init_record();
> > > + l2reflect_stats_loop();
> > > + rte_eal_mp_wait_lcore();
> > > + }
> > > + rte_eal_cleanup();
> > > +
> > > + if (l2reflect_hist)
> > > + output_histogram_snapshot();
> > > +
> > > + cleanup_record();
> > > +
> > > + return 0;
> > > +}
> > > diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
> > > new file mode 100644
> > > index 0000000000..14b154ef06
> > > --- /dev/null
> > > +++ b/app/l2reflect/meson.build
> > > @@ -0,0 +1,21 @@
> > > +# SPDX-License-Identifier: BSD-3-Clause
> > > +# Copyright(c) 2022 Siemens AG
> > > +
> > > +cc = meson.get_compiler('c')
> > > +
> > > +jansson = dependency('libjansson', required: false)
> > > +
> > > +if not jansson.found()
> > > + jansson = cc.find_library('jansson', required: false)
> > > +endif
> > > +
> > > +if cc.has_header('sys/io.h')
> > > + cflags += '-DHAS_SYS_IO'
> > > +endif
> > > +
> > > +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
> > > +deps += ['ethdev']
> > > +if jansson.found()
> > > + ext_deps += jansson
> > > + cflags += '-DRTE_HAS_JANSSON'
> > > +endif
> > > diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
> > > new file mode 100644
> > > index 0000000000..c1fae5d5e4
> > > --- /dev/null
> > > +++ b/app/l2reflect/payload.h
> > > @@ -0,0 +1,26 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2022 Siemens AG
> > > + */
> > > +#include <rte_ether.h>
> > > +
> > > +#ifndef _L2REFLECT_PAYLOAD_H_
> > > +#define _L2REFLECT_PAYLOAD_H_
> > > +
> > > +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
> > > +/* IEEE Std 802 - Local Experimental Ethertype */
> > > +#define ETHER_TYPE_L2REFLECT 0x88B5
> > > +
> > > +struct my_magic_packet {
> > > + /* l2 packet header */
> > > + struct rte_ether_hdr eth;
> > > + /* type of the l2reflect packet */
> > > + uint8_t type;
> > > + /* magic easy-to-spot pattern for tracing */
> > > + uint64_t magic;
> > > + /* break if latency is larger than this */
> > > + uint64_t breakval;
> > > + /* intended size of the packet */
> > > + uint64_t req_pkt_bytes;
> > > +};
> > > +
> > > +#endif /* _L2REFLECT_PAYLOAD_H_ */
>
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, September 20, 2022 10:02 PM
> To: Moessbauer, Felix (T CED SES-DE) <felix.moessbauer@siemens.com>;
> dev@dpdk.org
> Cc: Schild, Henning (T CED SES-DE) <henning.schild@siemens.com>; Kiszka, Jan
> (T CED) <jan.kiszka@siemens.com>; thomas@monjalon.net; Marcelo Tosatti
> <mtosatti@redhat.com>
> Subject: Re: [PATCH v6 2/2] Add l2reflect measurement application
>
> Hi Felix,
>
> First, I support the idea of having the l2reflect application part of
> the DPDK repository.
Hi Maxime,
Many thanks for the review.
I'm currently travelling but plan to address all responses regarding l2reflect in the upcoming week.
Best regards,
Felix
>
> Please note CI failed to build it on different platforms:
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fmails.dpd
> k.org%2Farchives%2Ftest-report%2F2022-
> September%2F304617.html&data=05%7C01%7Cfelix.moessbauer%40siem
> ens.com%7Cdd1323af309a42d0a12b08da9b10a598%7C38ae3bcd95794fd4adda
> b42e1495d55a%7C1%7C0%7C637992793049685048%7CUnknown%7CTWFpbGZ
> sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0
> %3D%7C3000%7C%7C%7C&sdata=eM6SKiinN6Jc%2Fb1lAYpY2TX1x7hxMzA
> 3iGetLzRzL0g%3D&reserved=0
>
> It also fails to build on my Fc35 machine:
> [3237/3537] Compiling C object app/dpdk-l2reflect.p/l2reflect_main.c.o
> ../app/l2reflect/main.c: In function 'l2reflect_main_loop':
> ../app/l2reflect/main.c:560:19: warning: array subscript 'uint64_t {aka
> long unsigned int}[0]' is partly outside array bounds of 'struct
> rte_ether_addr[1]' [-Warray-bounds]
> 560 | i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) &
> MAC_ADDR_CMP) >
> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> ../app/l2reflect/main.c:110:23: note: while referencing
> 'l2reflect_port_eth_addr'
> 110 | struct rte_ether_addr l2reflect_port_eth_addr;
> | ^~~~~~~~~~~~~~~~~~~~~~~
> ../app/l2reflect/main.c:561:27: warning: array subscript 'uint64_t {aka
> long unsigned int}[0]' is partly outside array bounds of 'struct
> rte_ether_addr[1]' [-Warray-bounds]
> 561 | (*((uint64_t
> *)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));
> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> ../app/l2reflect/main.c:111:23: note: while referencing
> 'l2reflect_remote_eth_addr'
> 111 | struct rte_ether_addr l2reflect_remote_eth_addr;
> | ^~~~~~~~~~~~~~~~~
>
> Some more comments inline:
>
> On 9/2/22 10:45, Felix Moessbauer wrote:
> > The l2reflect application implements a ping-pong benchmark to
> > measure the latency between two instances. For communication,
> > we use raw ethernet and send one packet at a time. The timing data
> > is collected locally and min/max/avg values are displayed in a TUI.
> > Finally, a histogram of the latencies is printed which can be
> > further processed with the jitterdebugger visualization scripts.
> > To debug latency spikes, a max threshold can be defined.
> > If it is hit, a trace point is created on both instances.
> >
> > Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> > Signed-off-by: Henning Schild <henning.schild@siemens.com>
> > ---
> > app/l2reflect/colors.c | 34 ++
> > app/l2reflect/colors.h | 19 +
> > app/l2reflect/l2reflect.h | 53 ++
> > app/l2reflect/main.c | 1007 +++++++++++++++++++++++++++++++++++++
> > app/l2reflect/meson.build | 21 +
> > app/l2reflect/payload.h | 26 +
> > app/l2reflect/stats.c | 225 +++++++++
> > app/l2reflect/stats.h | 67 +++
> > app/l2reflect/utils.c | 67 +++
> > app/l2reflect/utils.h | 20 +
> > app/meson.build | 1 +
> > 11 files changed, 1540 insertions(+)
> > create mode 100644 app/l2reflect/colors.c
> > create mode 100644 app/l2reflect/colors.h
> > create mode 100644 app/l2reflect/l2reflect.h
> > create mode 100644 app/l2reflect/main.c
> > create mode 100644 app/l2reflect/meson.build
> > create mode 100644 app/l2reflect/payload.h
> > create mode 100644 app/l2reflect/stats.c
> > create mode 100644 app/l2reflect/stats.h
> > create mode 100644 app/l2reflect/utils.c
> > create mode 100644 app/l2reflect/utils.h
>
> If we agree to have this application in app/ directory,
> I think you'll have to add documentation for this new tool in
> doc/guides/tools/.
>
> > diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
> > new file mode 100644
> > index 0000000000..af881d8788
> > --- /dev/null
> > +++ b/app/l2reflect/colors.c
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +
> > +#include "colors.h"
> > +
> > +const struct color_palette *colors;
> > +
> > +static const struct color_palette color_palette_default = {
> > + .red = "\x1b[01;31m",
> > + .green = "\x1b[01;32m",
> > + .yellow = "\x1b[01;33m",
> > + .blue = "\x1b[01;34m",
> > + .magenta = "\x1b[01;35m",
> > + .cyan = "\x1b[01;36m",
> > + .reset = "\x1b[0m"
> > +};
> > +
> > +static const struct color_palette color_palette_bw = { .red = "",
> > + .green = "",
> > + .yellow = "",
> > + .blue = "",
> > + .magenta = "",
> > + .cyan = "",
> > + .reset = "" };
> > +
> > +void
> > +enable_colors(int enable)
> > +{
> > + if (enable)
> > + colors = &color_palette_default;
> > + else
> > + colors = &color_palette_bw;
> > +}
> > diff --git a/app/l2reflect/colors.h b/app/l2reflect/colors.h
> > new file mode 100644
> > index 0000000000..346547138b
> > --- /dev/null
> > +++ b/app/l2reflect/colors.h
> > @@ -0,0 +1,19 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +#ifndef _L2REFLECT_COLORS_H_
> > +#define _L2REFLECT_COLORS_H_
> > +
> > +/* posix terminal colors */
> > +struct color_palette {
> > + const char *red, *green, *yellow, *blue, *magenta, *cyan, *reset;
> > +};
> > +
> > +/* ptr to the current tui color palette */
> > +extern const struct color_palette *colors;
> > +
> > +/* disable colored output */
> > +void
> > +enable_colors(int enable);
> > +
> > +#endif /* _L2REFLECT_COLORS_H_ */
> > diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
> > new file mode 100644
> > index 0000000000..922bd7c281
> > --- /dev/null
> > +++ b/app/l2reflect/l2reflect.h
> > @@ -0,0 +1,53 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +
> > +#include <stdatomic.h>
> > +
> > +#ifndef _L2REFLECT_L2REFLECT_H_
> > +#define _L2REFLECT_L2REFLECT_H_
> > +
> > +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
> > +
> > +/* max size that common 1G NICs support */
> > +#define MAX_JUMBO_PKT_LEN 9600
> > +
> > +/* Used to compare MAC addresses. */
> > +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
> > +
> > +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
> > +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
> > +
> > +enum {
> > + TRACE_TYPE_DATA,
> > + TRACE_TYPE_HELO,
> > + TRACE_TYPE_EHLO,
> > + TRACE_TYPE_RSET,
> > + TRACE_TYPE_QUIT,
> > +};
> > +
> > +enum STATE {
> > + /* elect the initial sender */
> > + S_ELECT_LEADER = 1,
> > + /* reset the counters */
> > + S_RESET_TRX = 2,
> > + /* measurement S_RUNNING */
> > + S_RUNNING = 4,
> > + /* terminated by local event */
> > + S_LOCAL_TERM = 8,
> > + /* terminated by remote event */
> > + S_REMOTE_TERM = 16
> > +};
> > +
> > +extern int l2reflect_hist;
> > +extern unsigned int l2reflect_hist_buckets;
> > +extern atomic_int l2reflect_output_hist;
> > +extern int l2reflect_interrupt;
> > +extern uint64_t l2reflect_sleep_msec;
> > +extern uint64_t l2reflect_pkt_bytes;
> > +extern uint16_t l2reflect_port_number;
> > +extern atomic_int l2reflect_state;
> > +extern struct rte_ether_addr l2reflect_port_eth_addr;
> > +extern struct rte_ether_addr l2reflect_remote_eth_addr;
> > +
> > +#endif /* _L2REFLECT_L2REFLECT_H_ */
> > diff --git a/app/l2reflect/main.c b/app/l2reflect/main.c
> > new file mode 100644
> > index 0000000000..33a87e8fad
> > --- /dev/null
> > +++ b/app/l2reflect/main.c
> > @@ -0,0 +1,1007 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + *
> > + * The l2reflect application implements a ping-pong benchmark to
> > + * measure the latency between two instances. For communication,
> > + * we use raw ethernet and send one packet at a time. The timing data
> > + * is collected locally and min/max/avg values are displayed in a TUI.
> > + * Finally, a histogram of the latencies is printed which can be
> > + * further processed with the jitterdebugger visualization scripts.
> > + * To debug latency spikes, a max threshold can be defined.
> > + * If it is hit, a trace point is created on both instances.
> > + *
> > + * Examples:
> > + * launch (non-rt kernel): l2reflect --lcores 0@0,1@6 -n 1
> > + * launch (rt kernel): l2reflect --lcores 0@0,1@6 -n 1 -- -P 50 -r -l
> > + *
> > + * For histogram data, launch with -H <usec> -F <output file>, e.g.
> > + * -H 10 -F histogram.json for a histogram with 10 usec buckets which
> > + * is written to a histogram.json file. This file can then be visualized
> > + * using the jitterdebugger plotting scripts:
> > + * jitterplot hist histogram.json
> > + *
> > + * While the application is running, it can be controlled by sending
> > + * signals to one of the processes:
> > + * - SIGUSR1: reset the min/max/avg on both instances
> > + * - SIGUSR2: output / write the current histogram
> > + * - SIGHUP/SIGINT: gracefully terminate both instances
> > + *
> > + * Note on wiring:
> > + * The l2reflect application sends the packets via a physical ethernet
> > + * interface. When running both instances on a single system, at least
> > + * two dedicated physical ports and a (physical) loopback between them
> > + * is required.
>
> Above text could be used as a basis for the documentation.
>
> > + */
> > +
> > +#include <stdio.h>
> > +#include <errno.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <stdint.h>
> > +#include <time.h>
> > +#include <inttypes.h>
> > +#include <getopt.h>
> > +#include <sys/signal.h>
> > +#include <assert.h>
> > +#include <unistd.h>
> > +#ifdef HAS_SYS_IO
> > +#include <sys/io.h>
> > +#endif
> > +#include <sched.h>
> > +#include <sys/mman.h>
> > +#include <stdatomic.h>
> > +
> > +#include <rte_common.h>
> > +#include <rte_errno.h>
> > +#include <rte_log.h>
> > +#include <rte_memory.h>
> > +#include <rte_memcpy.h>
> > +#include <rte_memzone.h>
> > +#include <rte_eal.h>
> > +#include <rte_eal_trace.h>
> > +#include <rte_per_lcore.h>
> > +#include <rte_launch.h>
> > +#include <rte_atomic.h>
> > +#include <rte_cycles.h>
> > +#include <rte_prefetch.h>
> > +#include <rte_lcore.h>
> > +#include <rte_per_lcore.h>
> > +#include <rte_branch_prediction.h>
> > +#include <rte_interrupts.h>
> > +#include <rte_random.h>
> > +#include <rte_debug.h>
> > +#include <rte_ether.h>
> > +#include <rte_ethdev.h>
> > +#include <rte_ring.h>
> > +#include <rte_mempool.h>
> > +#include <rte_mbuf.h>
> > +
> > +#include "l2reflect.h"
> > +#include "payload.h"
> > +#include "utils.h"
> > +#include "colors.h"
> > +#include "stats.h"
> > +
> > +#define NSEC_PER_SEC 1000000000
> > +
> > +#define NB_MBUF 2047
> > +
> > +#define MAX_PKT_BURST 32
> > +/* warmup a few round before starting the measurement */
> > +#define WARMUP_ROUNDS 42
>
> Would it make sense to have this as default value and provide the user
> with the possibility to configure it via command line?
>
> > +
> > +/* break after one second */
> > +#define DEFAULT_BREAKVAL_USEC 1000000ull
> > +/* break if no rx for more than this rounds */
> > +#define RX_TIMEOUT_MASK ~0xFFFFFull
> > +
> > +/* delay between two election packets */
> > +#define DELAY_ELECTION_MS 500
> > +
> > +int l2reflect_hist;
> > +unsigned int l2reflect_hist_buckets = HIST_NUM_BUCKETS_DEFAULT;
> > +atomic_int l2reflect_output_hist;
> > +int l2reflect_fake_mac;
> > +int l2reflect_interrupt;
> > +uint64_t l2reflect_sleep_msec;
> > +uint64_t l2reflect_pkt_bytes = 64;
> > +uint16_t l2reflect_port_number;
> > +atomic_int l2reflect_state;
> > +struct rte_ether_addr l2reflect_port_eth_addr;
> > +struct rte_ether_addr l2reflect_remote_eth_addr;
> > +
> > +static struct timespec last_sent, last_recv;
> > +static int quiet, disable_int, priority, policy, l2reflect_mlock;
> > +
> > +static atomic_int sleep_start;
> > +static uint64_t rounds;
> > +
> > +/* Configurable number of RX/TX ring descriptors */
> > +#define RTE_TEST_RX_DESC_DEFAULT 128
> > +#define RTE_TEST_TX_DESC_DEFAULT 128
> > +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
> > +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
> > +
> > +static struct rte_eth_conf port_conf = {
> > + .rxmode = {
> > + .split_hdr_size = 0,
> > + },
> > + .txmode = {
> > + .mq_mode = RTE_ETH_MQ_TX_NONE,
> > + },
> > +};
> > +
> > +static uint32_t l2reflect_q;
> > +static uint64_t l2reflect_break_usec = DEFAULT_BREAKVAL_USEC;
> > +
> > +static struct rte_ether_addr ether_bcast_addr = {
> > + .addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
> > +};
> > +
> > +struct rte_mempool *l2reflect_pktmbuf_pool;
> > +
> > +static void
> > +l2reflect_usage(const char *prgname)
> > +{
> > + printf("%s [EAL options] -- [-p PORT] -P [PRIO] [-b USEC] [-n SIZE] [-r] [-
> f] [-l]"
> > + "[-q] [-d] [-H USEC] [-B NUM] [-F FILE] [-S] [-i MSEC] [-m] [-c] [-
> h]\n"
> > + " -p PORT: port to configure\n"
> > + " -P PRIO: scheduling priority to use\n"
> > + " -b USEC: break when latency > USEC\n"
> > + " -n SIZE: size of packet in bytes [%i,%i]\n"
> > + " (when using jumbo frames, sender and receiver values have
> to match)\n"
> > + " -r: scheduling policy SCHED_RR\n"
> > + " -f: scheduling policy SCHED_FIFO\n"
> > + " -l: lock memory (mlockall)\n"
> > + " -q: quiet, do not print stats\n"
> > +#ifdef HAS_SYS_IO
> > + " -d: ignore maskable interrupts\n"
> > +#endif
> > + " -H USEC: create histogram of latencies with USEC time slices\n"
> > + " -B NUM: number of histogram buckets\n"
> > + " -F FILE: write histogram to file\n"
> > + " -S: start processing threads in sleep, wake with SIGCONT\n"
> > + " -i MSEC: use interrupts instead of polling (cont. on interrupt or
> after MSEC)\n"
> > + " -m: fake the source mac addr by adding 1 to the last tuple\n"
> > + " -c: disable colored output\n"
> > + " -h: display help message\n",
> > + prgname, RTE_ETHER_MIN_LEN, MAX_JUMBO_PKT_LEN);
> > +}
> > +
> > +static int
> > +check_opts_for_help(int argc, char **argv, void(*display_help)(const char *))
> > +{
> > + if (argc > 2 && !strncmp(argv[1], "--", 3)) {
> > + if (!strncmp(argv[2], "-h", 3) || !strncmp(argv[2], "--help", 7)) {
> > + display_help(argv[0]);
> > + return 1;
> > + }
> > + }
> > + return 0;
> > +}
> > +
> > +/* Parse the argument given in the command line of the application */
> > +static int
> > +l2reflect_parse_args(int argc, char **argv)
> > +{
> > + int opt, ret;
> > + char **argvopt;
> > + int option_index;
> > + int opt_colors = 1;
> > + char *prgname = argv[0];
> > + static struct option lgopts[] = { { NULL, 0, 0, 0 } };
> > +
> > + argvopt = argv;
> > + policy = SCHED_OTHER;
> > + hist_filename = NULL;
> > + l2reflect_output_hist = 0;
> > +
> > + while ((opt = getopt_long(argc, argvopt, "p:P:b:H:B:F:i:n:qdrflScm",
> lgopts,
> > + &option_index)) != EOF) {
> > + switch (opt) {
> > + /* port */
> > + case 'p':
> > + l2reflect_port_number =
> > + (uint16_t)strtoul(optarg, NULL, 10);
> > + break;
> > + case 'P':
> > + priority = strtoul(optarg, NULL, 10);
> > + if (priority > 0) {
> > + if (policy == SCHED_OTHER)
> > + policy = SCHED_RR;
> > + l2reflect_mlock = 1;
> > + }
> > + break;
> > + case 'b':
> > + l2reflect_break_usec = strtoul(optarg, NULL, 10);
> > + break;
> > + case 'S':
> > + sleep_start = 1;
> > + break;
> > + case 'q':
> > + quiet = 1;
> > + break;
> > + case 'd':
> > + disable_int = 1;
> > + break;
> > + case 'r':
> > + policy = SCHED_RR;
> > + break;
> > + case 'f':
> > + policy = SCHED_FIFO;
> > + break;
> > + case 'l':
> > + l2reflect_mlock = 1;
> > + break;
> > + case 'H':
> > + l2reflect_hist = 1;
> > + hist_bucket_usec = strtoul(optarg, NULL, 10);
> > +#ifndef RTE_HAS_JANSSON
> > + printf("not compiled with cjson support\n");
>
> s/cjson/jansson/
>
> > + return -1;
> > +#endif
> > + break;
> > + case 'B':
> > + l2reflect_hist_buckets = strtoul(optarg, NULL, 10);
> > + break;
> > + case 'F':
> > + hist_filename = strndup(optarg, 128);
> > + break;
> > + case 'i':
> > + l2reflect_interrupt = 1;
> > + l2reflect_sleep_msec = strtoul(optarg, NULL, 10);
> > + break;
> > + case 'n':
> > + l2reflect_pkt_bytes = strtoull(optarg, NULL, 10);
> > + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
> > + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN) {
> > + printf("packet size %" PRIu64 " not valid\n",
> l2reflect_pkt_bytes);
> > + return -1;
> > + }
> > + if (l2reflect_pkt_bytes >
> RTE_MBUF_DEFAULT_DATAROOM) {
> > + printf("NOT IMPLEMENTED. Packet size %"
> PRIu64 " requires segmented buffers.\n",
> > + l2reflect_pkt_bytes);
> > + return -1;
> > + }
> > + break;
> > + case 'c':
> > + opt_colors = 0;
> > + break;
> > + case 'm':
> > + l2reflect_fake_mac = 1;
> > + break;
> > + default:
> > + l2reflect_usage(prgname);
> > + return -1;
> > + }
> > + }
> > +
> > + if (optind >= 0)
> > + argv[optind - 1] = prgname;
> > +
> > + if (hist_filename && !l2reflect_hist) {
> > + printf("-F switch requires -H switch as well\n");
> > + return -1;
> > + }
> > +
> > + /* output is redirected, disable coloring */
> > + if (!isatty(fileno(stdout)))
> > + opt_colors = 0;
> > +
> > + enable_colors(opt_colors);
> > +
> > + ret = optind - 1;
> > + optind = 0; /* reset getopt lib */
> > + return ret;
> > +}
> > +
> > +/* Send a burst of one packet */
> > +static inline int
> > +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
> > +{
> > + unsigned int ret;
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > + uint16_t type;
> > +
> > + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
> > + pkt = (struct my_magic_packet *)eth;
> > + type = pkt->type;
> > +
> > + if (likely(type == TRACE_TYPE_DATA))
> > + clock_gettime(CLOCK_MONOTONIC, &last_sent);
> > + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
> > + if (unlikely(ret < 1))
> > + rte_pktmbuf_free(*m);
> > + return 0;
> > +}
> > +
> > +static inline void
> > +l2reflect_simple_forward(struct rte_mbuf *m)
> > +{
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > +
> > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > + pkt = (struct my_magic_packet *)eth;
> > +
> > + /* dst addr */
> > + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
> > + /* src addr */
> > + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
> > +
> > + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
> > + rte_eal_trace_generic_str("sending traced packet");
> > +
> > + l2reflect_send_packet(&m, l2reflect_port_number);
> > +}
> > +
> > +static struct rte_mbuf *
> > +l2reflect_new_pkt(unsigned int type)
> > +{
> > + struct rte_mbuf *m;
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > + uint64_t frame_bytes = RTE_ETHER_MIN_LEN;
> > +
> > + m = rte_pktmbuf_alloc(l2reflect_pktmbuf_pool);
> > + if (m == NULL)
> > + rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc failed\n");
> > +
> > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > +
> > + if (type == TRACE_TYPE_DATA)
> > + frame_bytes = l2reflect_pkt_bytes;
> > +
> > + /* zero out packet to make dumps better readable */
> > + memset(eth, 0, frame_bytes - RTE_ETHER_CRC_LEN);
> > +
> > + if (type == TRACE_TYPE_HELO)
> > + rte_ether_addr_copy(ðer_bcast_addr, ð->dst_addr);
> > + else
> > + rte_ether_addr_copy(&l2reflect_remote_eth_addr, ð-
> >dst_addr);
> > +
> > + /* src addr */
> > + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
> > + eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT);
> > +
> > + m->data_len = frame_bytes - RTE_ETHER_CRC_LEN;
> > + m->pkt_len = frame_bytes - RTE_ETHER_CRC_LEN;
> > +
> > + pkt = (struct my_magic_packet *)eth;
> > + pkt->type = type;
> > + pkt->breakval = l2reflect_break_usec;
> > + pkt->req_pkt_bytes = l2reflect_pkt_bytes;
> > +
> > + return m;
> > +}
> > +
> > +static void
> > +l2reflect_send_reset(void)
> > +{
> > + struct rte_mbuf *m;
> > + m = l2reflect_new_pkt(TRACE_TYPE_RSET);
> > + l2reflect_send_packet(&m, l2reflect_port_number);
> > +}
> > +
> > +static void
> > +l2reflect_send_quit(void)
> > +{
> > + struct rte_mbuf *m;
> > + m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
> > + l2reflect_send_packet(&m, l2reflect_port_number);
> > +}
> > +
> > +static void
> > +l2reflect_new_ball(void)
> > +{
> > + struct rte_mbuf *pnewball;
> > + struct rte_ether_hdr *eth;
> > + struct my_magic_packet *pkt;
> > + char mac_src_str[RTE_ETHER_ADDR_FMT_SIZE];
> > + char mac_dst_str[RTE_ETHER_ADDR_FMT_SIZE];
> > +
> > + RTE_LOG(INFO, L2REFLECT, "Should create a packet to play with ...\n");
> > + pnewball = l2reflect_new_pkt(TRACE_TYPE_DATA);
> > +
> > + eth = rte_pktmbuf_mtod(pnewball, struct rte_ether_hdr *);
> > +
> > + rte_ether_format_addr(mac_src_str, sizeof(mac_src_str),
> &l2reflect_port_eth_addr);
> > + rte_ether_format_addr(mac_dst_str, sizeof(mac_dst_str),
> &l2reflect_remote_eth_addr);
> > + RTE_LOG(INFO, L2REFLECT, "from MAC address: %s to %s\n\n",
> mac_src_str, mac_dst_str);
> > +
> > + pkt = (struct my_magic_packet *)eth;
> > +
> > + /* we are tracing lets tell the others */
> > + if (l2reflect_break_usec)
> > + pkt->magic = MAGIC_TRACE_PAYLOAD;
> > +
> > + l2reflect_send_packet(&pnewball, l2reflect_port_number);
> > +}
> > +
> > +static inline int64_t
> > +calcdiff_ns(struct timespec t1, struct timespec t2)
> > +{
> > + int64_t diff;
> > + diff = NSEC_PER_SEC * (int64_t)((int)t1.tv_sec - (int)t2.tv_sec);
> > + diff += ((int)t1.tv_nsec - (int)t2.tv_nsec);
> > + return diff;
> > +}
> > +
> > +/* filter the received packets for actual l2reflect messages */
> > +static inline unsigned int
> > +l2reflect_rx_filter(
> > + struct rte_mbuf *buf)
> > +{
> > + struct rte_ether_hdr *eth;
> > + eth = rte_pktmbuf_mtod(buf, struct rte_ether_hdr *);
> > +
> > + if (unlikely(buf->nb_segs > 1))
> > + RTE_LOG(WARNING, L2REFLECT, "Segmented packet: data-len:
> %i, pkt-len: %i, #seg: %i\n",
> > + buf->data_len, buf->pkt_len, buf->nb_segs);
> > +
> > + /* check for the l2reflect ether type */
> > + if (unlikely(eth->ether_type !=
> rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT)))
> > + return 0;
> > +
> > + /*
> > + * if the packet is not from our partner
> > + * (and we already have a partner), drop it
> > + */
> > + if (unlikely(l2reflect_state != S_ELECT_LEADER &&
> > + !rte_is_same_ether_addr(ð->src_addr,
> &l2reflect_remote_eth_addr)))
> > + return 0;
> > +
> > + /* filter bounce-back packets */
> > + if (unlikely(rte_is_same_ether_addr(ð->src_addr,
> &l2reflect_port_eth_addr)))
> > + return 0;
> > +
> > + return 1;
> > +}
> > +
> > +/*
> > + * automatically elect the leader of the benchmark by
> > + * sending out HELO packets and waiting for responses.
> > + * On response, the mac addresses are compared and the
> > + * numerically larger one becomes the leader.
> > + */
> > +static int
> > +elect_leader(uint16_t portid)
> > +{
> > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> > + struct rte_mbuf *m;
> > + struct rte_ether_hdr *eth;
> > + struct rte_ether_addr *src_addr;
> > + struct rte_eth_dev_info dev_info;
> > + struct my_magic_packet *pkt;
> > + unsigned int i, nb_rx;
> > + int ehlo_send = 0;
> > + int i_win;
> > +
> > + while (l2reflect_state == S_ELECT_LEADER) {
> > + /* send a packet to make sure the MAC addr of this interface is
> publicly known */
> > + m = l2reflect_new_pkt(TRACE_TYPE_HELO);
> > + RTE_LOG(INFO, L2REFLECT, "looking for player HELO\n");
> > + l2reflect_send_packet(&m, l2reflect_port_number);
> > + rte_delay_ms(DELAY_ELECTION_MS);
> > +
> > + /* receive election packets */
> > + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
> > + MAX_PKT_BURST);
> > +
> > + /* note: do not short-circuit as otherwise the mbufs are not
> freed */
> > + for (i = 0; i < nb_rx; i++) {
> > + m = pkts_burst[i];
> > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > + src_addr = ð->src_addr;
> > + pkt = (struct my_magic_packet *)eth;
> > +
> > + if (!l2reflect_rx_filter(m)) {
> > + rte_pktmbuf_free(m);
> > + continue;
> > + }
> > +
> > + if (pkt->type == TRACE_TYPE_EHLO && l2reflect_state
> == S_ELECT_LEADER) {
> > + /* check if both modes are equal */
> > + if (((l2reflect_pkt_bytes <= RTE_ETHER_MTU)
> > + != (pkt->req_pkt_bytes <=
> RTE_ETHER_MTU))) {
> > + l2reflect_state = S_LOCAL_TERM;
> > + m =
> l2reflect_new_pkt(TRACE_TYPE_QUIT);
> > + l2reflect_send_packet(&m,
> l2reflect_port_number);
> > + rte_exit(EXIT_FAILURE,
> > + "remote and local jumbo
> config does not match "
> > + "(%" PRIu64 " vs %" PRIu64
> ")\n",
> > + l2reflect_pkt_bytes, pkt-
> >req_pkt_bytes);
> > + }
> > + if (l2reflect_pkt_bytes != pkt->req_pkt_bytes) {
> > + l2reflect_pkt_bytes =
> MAX(l2reflect_pkt_bytes,
> > + pkt-
> >req_pkt_bytes);
> > +
> rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
> > + const uint32_t overhead_len =
> eth_dev_get_overhead_len(
> > +
> dev_info.max_rx_pktlen,
> > +
> dev_info.max_mtu);
> > + const uint16_t mtu =
> MAX(l2reflect_pkt_bytes - overhead_len,
> > +
> dev_info.min_mtu);
> > + RTE_LOG(INFO, L2REFLECT,
> > + "update frame sizes: frame: %"
> PRIu64 ", MTU %d\n",
> > + l2reflect_pkt_bytes, mtu);
> > + const int ret = rte_eth_dev_set_mtu(
> > +
> l2reflect_port_number,
> > + mtu);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE, "failed
> to update MTU: %s\n",
> > + strerror(-ret));
> > + }
> > +
> > + if (ehlo_send) {
> > + l2reflect_state = S_RUNNING;
> > + RTE_LOG(INFO, L2REFLECT, "Enter
> running state\n");
> > + }
> > + }
> > + /* we got a HELO packet, respond with EHLO */
> > + if (pkt->type == TRACE_TYPE_HELO) {
> > + char
> mac_str_other[RTE_ETHER_ADDR_FMT_SIZE];
> > + rte_ether_addr_copy(src_addr,
> &l2reflect_remote_eth_addr);
> > + m = l2reflect_new_pkt(TRACE_TYPE_EHLO);
> > + rte_ether_format_addr(
> > + mac_str_other, sizeof(mac_str_other),
> &l2reflect_remote_eth_addr);
> > + RTE_LOG(INFO, L2REFLECT, "found one HELO
> from %s\n", mac_str_other);
> > + l2reflect_send_packet(&m,
> l2reflect_port_number);
> > + ehlo_send = 1;
> > + }
> > + rte_pktmbuf_free(m);
>
> The loop content could be moved in a dedicated function, it would
> improve the readability.
>
> > + }
> > + }
> > +
> > + if (rte_is_same_ether_addr(&l2reflect_port_eth_addr,
> &l2reflect_remote_eth_addr))
> > + rte_exit(EXIT_FAILURE, "talking to myself ... confused\n");
> > +
> > + /* the one with the bigger MAC is the leader */
> > + i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) & MAC_ADDR_CMP)
> >
> > + (*((uint64_t *)&l2reflect_remote_eth_addr) &
> MAC_ADDR_CMP));
> > +
> > + RTE_LOG(INFO, L2REFLECT, "i am the \"%s\"\n", i_win ? "rick" :
> "morty");
> > +
> > + return i_win;
> > +}
> > +
> > +/*
> > + * add the measured time diff to the statistics.
> > + * return false if threshold is hit
> > + */
> > +static inline int
> > +add_to_record(const uint64_t diff)
> > +{
> > + record.rounds++;
> > + /* do not count the first rounds, diff would be too high */
> > + if (record.rounds < WARMUP_ROUNDS)
> > + return 1;
> > +
> > + if (l2reflect_hist) {
> > + const uint64_t bucket =
> > + MIN(diff / (hist_bucket_usec * 1000),
> l2reflect_hist_buckets-1);
> > + record.hist[bucket]++;
> > + }
> > +
> > + record.avg_round_ns += (double)diff;
> > + if (diff < record.min_round_ns)
> > + record.min_round_ns = diff;
> > + if (diff > record.max_round_ns) {
> > + record.max_round_ns = diff;
> > + if (l2reflect_break_usec &&
> > + (record.max_round_ns > (l2reflect_break_usec * 1000)))
> > + return 0;
> > + }
> > + return 1;
> > +}
> > +
> > +/*
> > + * process a single packet.
> > + * return false if latency threshold is hit
> > + */
> > +static inline int
> > +process_packet(
> > + struct my_magic_packet *pkt,
> > + struct timespec *rx_time,
> > + uint64_t *diff)
> > +{
> > + if (pkt->type == TRACE_TYPE_DATA) {
> > + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
> > + *diff = calcdiff_ns(last_recv, last_sent);
> > + if (!unlikely(add_to_record(*diff))) {
> > + /* TODO: improve tracing */
> > + rte_eal_trace_generic_u64(record.max_round_ns /
> 1000);
> > + return 0;
> > + }
> > + }
> > + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
> > + rte_eal_trace_generic_str("received traced packet");
> > +
> > + return 1;
> > +}
> > +
> > +/*
> > + * free all packet buffers in the range [begin, end[.
> > + */
> > +static void
> > +free_pktbufs(
> > + struct rte_mbuf **bufs,
> > + int begin,
> > + int end)
> > +{
> > + int i = begin;
> > + for (; i < end; i++)
> > + rte_pktmbuf_free(bufs[0]);
>
> rte_pktmbuf_free(bufs[i]) ?
>
> > +}
> > +
> > +/*
> > + * return 1 in case the ball was lost (cheap check)
> > + */
> > +static inline void
> > +check_ball_lost(const uint64_t dp_idle) {
> > + /* only check if we are in running state and have a breakval */
> > + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
> > + l2reflect_state == S_RUNNING &&
> > + l2reflect_break_usec &&
> > + record.rounds > WARMUP_ROUNDS) {
> > + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64 "
> rounds\n", record.rounds);
> > + l2reflect_state = S_LOCAL_TERM;
> > + }
> > +}
> > +
> > +/* main processing loop */
> > +static void
> > +l2reflect_main_loop(void)
> > +{
> > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> > + struct rte_mbuf *m;
> > + unsigned int lcore_id;
> > + unsigned int j, nb_rx, nb_evt;
> > + uint16_t portid;
> > + /* number of consequent idle passes */
> > + uint64_t dp_idle = 0;
> > + uint64_t diff = 0;
> > + int sender;
> > + struct my_magic_packet *pkt;
> > + struct rte_ether_hdr *eth;
> > + struct rte_epoll_event event;
> > +
> > + lcore_id = rte_lcore_id();
> > +
> > + RTE_LOG(INFO, L2REFLECT, "entering main loop on lcore %u\n",
> lcore_id);
> > +
> > + portid = l2reflect_port_number;
> > + RTE_LOG(INFO, L2REFLECT, " -- lcoreid=%u portid=%u\n", lcore_id,
> > + portid);
> > + assert_link_status(portid);
> > +
> > +restart:
> > + init_record();
> > + rte_eth_stats_reset(portid);
> > + l2reflect_state = S_ELECT_LEADER;
> > + sender = elect_leader(portid);
> > +
> > + if (l2reflect_break_usec)
> > + rte_eal_trace_generic_str("hit breakval");
> > +
> > + /* the leader election implements a latch (half-barrier).
> > + * To ensure that the other party is in running state, we
> > + * have to wait at least a full election period
> > + */
> > + rte_delay_ms(DELAY_ELECTION_MS * 2);
> > +
> > + /* we are the sender so we bring one ball into the game */
> > + if (sender)
> > + l2reflect_new_ball();
> > +
> > + /* reset the record */
> > + init_record();
> > + while (l2reflect_state == S_RUNNING) {
> > + struct timespec rx_time;
> > +
> > + if (l2reflect_interrupt) {
> > + rte_eth_dev_rx_intr_enable(portid, l2reflect_q);
> > + /* wait for interrupt or timeout */
> > + nb_evt = rte_epoll_wait(RTE_EPOLL_PER_THREAD,
> &event, 1,
> > + l2reflect_sleep_msec);
> > + rte_eth_dev_rx_intr_disable(portid, l2reflect_q);
> > + if (nb_evt == 0 && rounds > WARMUP_ROUNDS)
> > + ++record.timeouts;
> > + }
> > +
> > + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
> > + MAX_PKT_BURST);
> > +
> > + if (nb_rx) {
> > + clock_gettime(CLOCK_MONOTONIC, &rx_time);
> > + dp_idle = 0;
> > + } else
> > + ++dp_idle;
> > +
> > + for (j = 0; j < nb_rx; j++) {
> > + m = pkts_burst[j];
> > + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
> > + pkt = (struct my_magic_packet *)eth;
> > +
> > + rte_prefetch0(eth);
> > +
> > + if (unlikely(!l2reflect_rx_filter(m))) {
> > + rte_pktmbuf_free(m);
> > + continue;
> > + }
> > +
> > + /* remote is telling us to reset or stop */
> > + if (unlikely(pkt->type == TRACE_TYPE_RSET)) {
> > + free_pktbufs(pkts_burst, j, nb_rx);
> > + goto restart;
> > + }
> > + if (unlikely(pkt->type == TRACE_TYPE_QUIT)) {
> > + l2reflect_state = S_REMOTE_TERM;
> > + free_pktbufs(pkts_burst, j, nb_rx);
> > + break;
> > + }
> > +
> > + if (likely(l2reflect_state == S_RUNNING)) {
> > + if (unlikely(!process_packet(pkt, &rx_time,
> &diff))) {
> > + l2reflect_state = S_LOCAL_TERM;
> > + free_pktbufs(pkts_burst, j, nb_rx);
> > + break;
> > + }
> > + l2reflect_simple_forward(m);
> > + }
> > + }
> > + check_ball_lost(dp_idle);
> > + }
> > +
> > + const int state_cpy = l2reflect_state;
> > + switch (state_cpy) {
> > + case S_RESET_TRX:
> > + l2reflect_send_reset();
> > + l2reflect_state = S_ELECT_LEADER;
> > + /* fallthrough */
> > + case S_ELECT_LEADER:
> > + goto restart;
> > + }
> > +
> > + if (state_cpy == S_LOCAL_TERM) {
> > + rte_eal_trace_generic_str("local termination");
> > + l2reflect_send_quit();
> > + } else if (state_cpy == S_REMOTE_TERM) {
> > + RTE_LOG(INFO, L2REFLECT, "received message that remote hit
> threshold (or is cancelled)\n");
> > + }
> > +}
> > +
> > +static int
> > +l2reflect_launch_one_lcore(__rte_unused void *dummy)
> > +{
> > + struct sched_param param;
> > + int err;
> > +
> > + if (sleep_start) {
> > + RTE_LOG(INFO, L2REFLECT, "Sleeping and waiting for
> SIGCONT\n");
> > + while (sleep_start) {
> > + rte_delay_ms(10);
> > + if (l2reflect_state == S_LOCAL_TERM)
> > + rte_exit(EXIT_SUCCESS, "Quit\n");
> > + }
> > + RTE_LOG(INFO, L2REFLECT, "Got SIGCONT, continuing");
> > + }
> > + if (l2reflect_mlock) {
> > + err = mlockall(MCL_CURRENT | MCL_FUTURE);
> > + if (err)
> > + rte_exit(EXIT_FAILURE, "mlockall failed: %s\n",
> > + strerror(errno));
> > + }
> > + if (priority > 0 || policy != SCHED_OTHER) {
> > + memset(¶m, 0, sizeof(param));
> > + param.sched_priority = priority;
> > + err = sched_setscheduler(0, policy, ¶m);
> > + if (err)
> > + rte_exit(EXIT_FAILURE,
> > + "sched_setscheduler failed: %s\n",
> > + strerror(errno));
> > + }
> > + if (l2reflect_interrupt) {
> > + err = rte_eth_dev_rx_intr_ctl_q(l2reflect_port_number,
> > + l2reflect_q,
> > + RTE_EPOLL_PER_THREAD,
> > + RTE_INTR_EVENT_ADD,
> NULL);
> > + if (err)
> > + rte_exit(EXIT_FAILURE,
> > + "could not register I/O interrupt\n");
> > + }
> > + l2reflect_main_loop();
> > + return 0;
> > +}
> > +
> > +static void
> > +sig_handler(int signum)
> > +{
> > + switch (signum) {
> > + case SIGUSR1:
> > + if (l2reflect_state == S_RUNNING)
> > + l2reflect_state = S_RESET_TRX;
> > + break;
> > + case SIGUSR2:
> > + l2reflect_output_hist = 1;
> > + break;
> > + case SIGCONT:
> > + sleep_start = 0;
> > + break;
> > + case SIGHUP:
> > + case SIGINT:
> > + l2reflect_state = S_LOCAL_TERM;
> > + break;
> > + }
> > +}
> > +
> > +int
> > +main(int argc, char **argv)
> > +{
> > + struct rte_eth_dev_info dev_info;
> > + struct rte_eth_txconf txconf;
> > + int ret;
> > + uint32_t i;
> > + uint16_t nb_ports;
> > + unsigned int lcore_id;
> > + struct sigaction action;
> > + bzero(&action, sizeof(action));
> > + char mempool_name[128];
> > + char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
> > +
> > + action.sa_handler = sig_handler;
> > + if (sigaction(SIGHUP, &action, NULL) < 0 ||
> > + sigaction(SIGUSR1, &action, NULL) < 0 ||
> > + sigaction(SIGUSR2, &action, NULL) < 0 ||
> > + sigaction(SIGCONT, &action, NULL) < 0 ||
> > + sigaction(SIGINT, &action, NULL) < 0) {
> > + rte_exit(EXIT_FAILURE, "Could not register signal handler\n");
> > + }
> > +
> > + lcore_id = rte_lcore_id();
> > +
> > + if (check_opts_for_help(argc, argv, l2reflect_usage))
> > + return 0;
> > +
> > + /* init EAL */
> > + ret = rte_eal_init(argc, argv);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
> > + argc -= ret;
> > + argv += ret;
> > +
> > + /* parse application arguments (after the EAL ones) */
> > + ret = l2reflect_parse_args(argc, argv);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE, "Invalid L2REFLECT arguments\n");
> > +
> > + snprintf(mempool_name, sizeof(mempool_name), "mbuf_pool_%d",
> getpid());
> > + RTE_LOG(DEBUG, L2REFLECT, "About to create mempool \"%s\"\n",
> mempool_name);
> > + /* create the mbuf pool */
> > + l2reflect_pktmbuf_pool =
> > + rte_pktmbuf_pool_create(mempool_name, NB_MBUF,
> > + MAX_PKT_BURST, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
> > + rte_socket_id());
> > +
> > + if (l2reflect_pktmbuf_pool == NULL)
> > + rte_exit(EXIT_FAILURE,
> > + "Cannot init/find mbuf pool name %s\nError: %d
> %s\n",
> > + mempool_name, rte_errno, rte_strerror(rte_errno));
> > +
> > + nb_ports = rte_eth_dev_count_avail();
> > + if (nb_ports == 0)
> > + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
> > + if (l2reflect_port_number + 1 > nb_ports)
> > + rte_exit(EXIT_FAILURE, "Chosen port %d does not exist -
> bye\n",
> > + l2reflect_port_number);
> > + RTE_LOG(INFO, L2REFLECT, "We have %d ports and will use port %d\n",
> nb_ports,
> > + l2reflect_port_number);
> > +
> > + rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
> > + RTE_LOG(INFO, L2REFLECT, "Initializing port %u ...\n",
> l2reflect_port_number);
> > + fflush(stdout);
> > +
> > + if (l2reflect_interrupt)
> > + port_conf.intr_conf.rxq = 1;
> > +
> > + ret = config_port_max_pkt_len(&port_conf, &dev_info);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE,
> > + "Invalid max packet length: %u (port %u)\n",
> > + l2reflect_port_number, l2reflect_port_number);
> > +
> > + ret = rte_eth_dev_configure(l2reflect_port_number, 1, 1, &port_conf);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE,
> > + "Cannot configure device: err=%s, port=%u\n",
> > + strerror(-ret), l2reflect_port_number);
> > +
> > + ret = rte_eth_dev_adjust_nb_rx_tx_desc(l2reflect_port_number,
> &nb_rxd, &nb_txd);
> > + if (ret != 0)
> > + rte_exit(EXIT_FAILURE,
> > + "Cannot adjust # of Rx/Tx descriptors to HW limits:
> err=%s, port=%u\n",
> > + strerror(-ret), l2reflect_port_number);
> > +
> > + /* init RX queues */
> > + for (i = 0; i <= l2reflect_q; i++) {
> > + ret = rte_eth_rx_queue_setup(
> > + l2reflect_port_number, i, nb_rxd,
> > + rte_eth_dev_socket_id(l2reflect_port_number), NULL,
> > + l2reflect_pktmbuf_pool);
> > + if (ret < 0)
> > + rte_exit(
> > + EXIT_FAILURE,
> > + "rte_eth_rx_queue_setup:err=%s, port=%u
> q=%u\n",
> > + strerror(-ret), l2reflect_port_number, i);
> > + }
> > +
> > + /* init one TX queue on each port */
> > + txconf = dev_info.default_txconf;
> > + txconf.offloads = port_conf.txmode.offloads;
> > + ret = rte_eth_tx_queue_setup(
> > + l2reflect_port_number, 0, nb_txd,
> > + rte_eth_dev_socket_id(l2reflect_port_number), &txconf);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE,
> > + "rte_eth_tx_queue_setup:err=%s, port=%u\n",
> > + strerror(-ret), (unsigned int)l2reflect_port_number);
> > +
> > + /* Start device */
> > + ret = rte_eth_dev_start(l2reflect_port_number);
> > + if (ret < 0)
> > + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%s, port=%u\n",
> > + strerror(-ret), (unsigned int)l2reflect_port_number);
> > +
> > + rte_eth_macaddr_get(l2reflect_port_number,
> &l2reflect_port_eth_addr);
> > +
> > + /*
> > + * When running on a Tap device, we might want to use a foreign
> > + * mac address to make sure that the application and the Tap device
> > + * do not share the same MAC addr. By that, we enforce that the
> > + * bridge learns this address and correctly forwards unicast packets.
> > + */
> > + if (l2reflect_fake_mac)
> > + l2reflect_port_eth_addr.addr_bytes[5] += 1;
> > +
> > + rte_ether_format_addr(mac_str, sizeof(mac_str),
> > + &l2reflect_port_eth_addr);
> > + RTE_LOG(INFO, L2REFLECT, "Port %u, MAC address: %s\n\n",
> > + (unsigned int)l2reflect_port_number, mac_str);
> > +
> > + /*
> > + * in quiet mode the primary executes the main packet loop
> > + * otherwise the one worker does it and the primary prints stats
> > + */
> > + if (quiet) {
> > + assert(rte_lcore_count() == 1);
> > +#ifdef HAS_SYS_IO
> > + if (disable_int) {
> > + iopl(3);
> > + asm("cli");
> > + }
> > +#endif
> > + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n", getpid(),
> getppid());
> > + l2reflect_launch_one_lcore(NULL);
> > + } else {
> > + assert(rte_lcore_count() == 2);
> > + /* the worker reflects the packets */
> > + RTE_LCORE_FOREACH_WORKER(lcore_id)
> > + {
> > + rte_eal_remote_launch(l2reflect_launch_one_lcore,
> NULL,
> > + lcore_id);
> > + }
> > +
> > + /* the primary prints the stats */
> > + init_record();
> > + l2reflect_stats_loop();
> > + rte_eal_mp_wait_lcore();
> > + }
> > + rte_eal_cleanup();
> > +
> > + if (l2reflect_hist)
> > + output_histogram_snapshot();
> > +
> > + cleanup_record();
> > +
> > + return 0;
> > +}
> > diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
> > new file mode 100644
> > index 0000000000..14b154ef06
> > --- /dev/null
> > +++ b/app/l2reflect/meson.build
> > @@ -0,0 +1,21 @@
> > +# SPDX-License-Identifier: BSD-3-Clause
> > +# Copyright(c) 2022 Siemens AG
> > +
> > +cc = meson.get_compiler('c')
> > +
> > +jansson = dependency('libjansson', required: false)
> > +
> > +if not jansson.found()
> > + jansson = cc.find_library('jansson', required: false)
> > +endif
> > +
> > +if cc.has_header('sys/io.h')
> > + cflags += '-DHAS_SYS_IO'
> > +endif
> > +
> > +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
> > +deps += ['ethdev']
>
> I think some dependecies are missing:
> ring, mbuf, mempool
>
> > +if jansson.found()
> > + ext_deps += jansson
> > + cflags += '-DRTE_HAS_JANSSON'
> > +endif
> > diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
> > new file mode 100644
> > index 0000000000..c1fae5d5e4
> > --- /dev/null
> > +++ b/app/l2reflect/payload.h
> > @@ -0,0 +1,26 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Siemens AG
> > + */
> > +#include <rte_ether.h>
> > +
> > +#ifndef _L2REFLECT_PAYLOAD_H_
> > +#define _L2REFLECT_PAYLOAD_H_
> > +
> > +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
> > +/* IEEE Std 802 - Local Experimental Ethertype */
> > +#define ETHER_TYPE_L2REFLECT 0x88B5
> > +
> > +struct my_magic_packet {
> > + /* l2 packet header */
> > + struct rte_ether_hdr eth;
> > + /* type of the l2reflect packet */
> > + uint8_t type;
> > + /* magic easy-to-spot pattern for tracing */
> > + uint64_t magic;
> > + /* break if latency is larger than this */
> > + uint64_t breakval;
> > + /* intended size of the packet */
> > + uint64_t req_pkt_bytes;
> > +};
> > +
> > +#endif /* _L2REFLECT_PAYLOAD_H_ */
> > diff --git a/app/l2reflect/stats.c b/app/l2reflect/stats.c
> > new file mode 100644
> > index 0000000000..6bcbb7a2bf
> > --- /dev/null
> > +++ b/app/l2reflect/stats.c
> > @@ -0,0 +1,225 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +#include <stdio.h>
> > +#include <sys/types.h>
> > +#include <unistd.h>
> > +#include <time.h>
> > +#include <string.h>
> > +#ifdef RTE_HAS_JANSSON
> > +#include <jansson.h>
> > +#endif
> > +#include "colors.h"
> > +#include "stats.h"
> > +
> > +unsigned int hist_bucket_usec;
> > +struct stats record;
> > +char *hist_filename;
> > +
> > +void
> > +init_record(void)
> > +{
> > + record.max_round_ns = 0;
> > + record.min_round_ns = MIN_INITIAL;
> > + record.rounds = 0;
> > + record.timeouts = 0;
> > + record.avg_round_ns = 0;
> > + if (l2reflect_hist) {
> > + if (!record.hist_size) {
> > + record.hist =
> > + calloc(l2reflect_hist_buckets, sizeof(uint64_t));
> > + record.hist_size = l2reflect_hist_buckets;
> > + } else {
> > + memset(record.hist, 0,
> > + record.hist_size * sizeof(uint64_t));
> > + }
> > + }
> > + clock_gettime(CLOCK_MONOTONIC, &record.time_start);
> > +}
> > +
> > +void
> > +cleanup_record(void)
> > +{
> > + if (l2reflect_hist) {
> > + free(record.hist);
> > + record.hist = NULL;
> > + record.hist_size = 0;
> > + }
> > +}
> > +
> > +void
> > +output_histogram_snapshot(void)
> > +{
> > + char *json = serialize_histogram(&record);
> > + FILE *fd = stderr;
> > + if (hist_filename)
> > + fd = fopen(hist_filename, "w");
> > + fputs(json, fd);
> > + fputs("\n", fd);
> > + free(json);
> > + if (hist_filename)
> > + fclose(fd);
> > +}
> > +
> > +void
> > +print_stats(void)
> > +{
> > + const char clr[] = { 27, '[', '2', 'J', '\0' };
> > + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
> > + const uint64_t bytes_in_gib = 0x40000000;
> > + struct rte_eth_stats stats;
> > + char mac_str_me[RTE_ETHER_ADDR_FMT_SIZE];
> > + char mac_str_remote[RTE_ETHER_ADDR_FMT_SIZE];
> > + char timeout_bound_str[32];
> > + const char *str_jumbo = l2reflect_pkt_bytes > RTE_ETHER_MTU ?
> > + "jumbo" : "no-jumbo";
> > + struct timespec time_now;
> > +
> > + if (l2reflect_interrupt == 0)
> > + snprintf(timeout_bound_str, sizeof(timeout_bound_str), "
> polling");
> > + else if (l2reflect_sleep_msec == -1u)
> > + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">=
> infinite");
> > + else
> > + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">%9"
> PRIu64 "ms",
> > + l2reflect_sleep_msec);
> > +
> > + rte_eth_stats_get(l2reflect_port_number, &stats);
> > + rte_ether_format_addr(mac_str_me, sizeof(mac_str_me),
> > + &l2reflect_port_eth_addr);
> > + rte_ether_format_addr(mac_str_remote, sizeof(mac_str_remote),
> > + &l2reflect_remote_eth_addr);
> > +
> > + clock_gettime(CLOCK_MONOTONIC, &time_now);
> > + const uint64_t time_since_start = time_now.tv_sec -
> record.time_start.tv_sec;
> > +
> > + /* Clear screen and move to top left */
> > + printf("%s%s", clr, topLeft);
> > +
> > + printf("%sNetworking Roundtrip Test%s\n", colors->green, colors-
> >reset);
> > + printf("\n%sPort statistics
> ====================================%s",
> > + colors->magenta, colors->reset);
> > +
> > + printf("\nMe: %s <--> Remote: %s", mac_str_me, mac_str_remote);
> > + printf("\nStatistics for port %d PID %d on lcore %d ---------"
> > + "\nState: %-16s %10" PRIu64 " s"
> > + "\nPackets tx: %22" PRIu64 "\nPackets rx: %22" PRIu64
> > + "\nBytes tx: %24" PRIu64 " (%8.2f GiB)"
> > + "\nBytes rx: %24" PRIu64 " (%8.2f GiB)"
> > + "\nErrors tx: %23" PRIu64 "\nErrors rx: %23" PRIu64
> > + "\nTimeouts rx: %21" PRIu64 " (%s)"
> > + "\nPacketsize [Byte]: %15" PRIu64 " (%12s)",
> > + l2reflect_port_number, getpid(), rte_lcore_id(),
> > + runstate_tostring(l2reflect_state),
> > + time_since_start,
> > + stats.opackets, stats.ipackets, stats.obytes,
> > + (double)stats.obytes / bytes_in_gib, stats.ibytes,
> > + (double)stats.ibytes / bytes_in_gib, stats.oerrors,
> > + stats.ierrors, record.timeouts,
> > + timeout_bound_str, l2reflect_pkt_bytes, str_jumbo);
> > + printf("\n%sPort timing statistics
> =============================%s",
> > + colors->magenta, colors->reset);
> > + if (l2reflect_state == S_ELECT_LEADER ||
> > + record.min_round_ns == MIN_INITIAL) {
> > + printf("\n\nBenchmark not started yet...\n");
> > + } else {
> > + printf("\n%sMax%s roundtrip: %19" PRIu64 " us", colors->red,
> > + colors->reset, record.max_round_ns / 1000);
> > + printf("\n%sAvg%s roundtrip: %19" PRIu64 " us", colors-
> >yellow,
> > + colors->reset,
> > + record.rounds ? (uint64_t)(record.avg_round_ns /
> > + record.rounds / 1000) :
> > + 0);
> > + printf("\n%sMin%s roundtrip: %19" PRIu64 " us", colors->green,
> > + colors->reset, record.min_round_ns / 1000);
> > + }
> > +
> printf("\n%s================================================
> ====%s\n",
> > + colors->magenta, colors->reset);
> > +}
> > +
> > +void
> > +l2reflect_stats_loop(void)
> > +{
> > + while (!(l2reflect_state & (S_LOCAL_TERM | S_REMOTE_TERM))) {
> > + print_stats();
> > + if (l2reflect_hist && l2reflect_output_hist) {
> > + output_histogram_snapshot();
> > + l2reflect_output_hist = 0;
> > + }
> > + rte_delay_us_sleep(1000000);
> > + }
> > +}
> > +
> > +char *
> > +serialize_histogram(__rte_unused const struct stats *record)
> > +{
> > +#ifndef RTE_HAS_JANSSON
> > + return strdup("to print histogram, build with jansson support");
> > +#else
> > + char *str = NULL;
> > + char key[8];
> > + unsigned int i;
> > + json_t *hist0, *cpu0, *all_cpus, *output;
> > +
> > + output = json_object();
> > + /* version: 1 */
> > + json_object_set_new(output, "version", json_integer(1));
> > +
> > + /* cpu 0 histogram */
> > + hist0 = json_object();
> > + for (i = 0; i < record->hist_size; ++i) {
> > + /* only log positive numbers to meet jitterplot format */
> > + if (record->hist[i] != 0) {
> > + snprintf(key, 8, "%u", i * hist_bucket_usec);
> > + json_object_set(hist0, key,
> > + json_integer(record->hist[i]));
> > + }
> > + }
> > +
> > + /* in case of empty histogram, set these values to zero */
> > + const json_int_t min_round_us =
> > + record->rounds ? record->min_round_ns / 1000 : 0;
> > + const json_int_t avg_round_us =
> > + record->rounds ? record->avg_round_ns / record->rounds /
> 1000 : 0;
> > + const json_int_t max_round_us =
> > + record->rounds ? record->max_round_ns / 1000 : 0;
> > +
> > + /* cpu 0 stats */
> > + cpu0 = json_object();
> > + json_object_set_new(cpu0, "histogram", hist0);
> > + json_object_set_new(cpu0, "count", json_integer(record->rounds));
> > + json_object_set_new(cpu0, "min", json_integer(min_round_us));
> > + json_object_set_new(cpu0, "max", json_integer(max_round_us));
> > + json_object_set_new(cpu0, "avg", json_integer(avg_round_us));
> > +
> > + /* combine objects */
> > + all_cpus = json_object();
> > + json_object_set_new(all_cpus, "0", cpu0);
> > + json_object_set_new(output, "cpu", all_cpus);
> > +
> > + str = json_dumps(output, JSON_ENSURE_ASCII | JSON_INDENT(2));
> > +
> > + /* cleanup */
> > + json_decref(output);
> > +
> > + return str;
> > +#endif
> > +}
> > +
> > +const char *
> > +runstate_tostring(int s)
> > +{
> > + switch (s) {
> > + case S_ELECT_LEADER:
> > + return "Electing";
> > + case S_RESET_TRX:
> > + return "Resetting";
> > + case S_RUNNING:
> > + return "Running";
> > + case S_LOCAL_TERM:
> > + return "Term. local";
> > + case S_REMOTE_TERM:
> > + return "Term. remote";
> > + default:
> > + return "Preparing";
> > + }
> > +}
> > diff --git a/app/l2reflect/stats.h b/app/l2reflect/stats.h
> > new file mode 100644
> > index 0000000000..7f3dd9fffb
> > --- /dev/null
> > +++ b/app/l2reflect/stats.h
> > @@ -0,0 +1,67 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Siemens AG
> > + */
> > +#ifndef _L2REFLECT_STATS_H_
> > +#define _L2REFLECT_STATS_H_
> > +#include <stdint.h>
> > +#include <stdatomic.h>
> > +#include <limits.h>
> > +
> > +#include <rte_ethdev.h>
> > +
> > +#include "l2reflect.h"
> > +
> > +#define MIN_INITIAL ULONG_MAX
> > +#define HIST_NUM_BUCKETS_DEFAULT 128
> > +
> > +/* runtime statistics */
> > +struct stats {
> > + uint64_t max_round_ns;
> > + uint64_t min_round_ns;
> > + uint64_t rounds;
> > + uint64_t timeouts;
> > + double avg_round_ns;
> > + unsigned int hist_size;
> > + /* each slot is 10us */
> > + uint64_t *hist;
> > + struct timespec time_start;
> > +};
> > +
> > +/* size of each histogram bucket in usec */
> > +extern unsigned int hist_bucket_usec;
> > +extern struct stats record;
> > +extern char *hist_filename;
> > +
> > +void
> > +init_record(void);
> > +void
> > +cleanup_record(void);
> > +
> > +void
> > +l2reflect_stats_loop(void);
> > +
> > +/*
> > + * Write the histogram to file / stdio without any locking.
> > + * When called during the measurement, values are approximations
> > + * (racy reads).
> > + */
> > +void
> > +output_histogram_snapshot(void);
> > +
> > +/* Print out statistics on packets dropped */
> > +void
> > +print_stats(void);
> > +
> > +/*
> > + * get a JSON representation of the record
> > + */
> > +char *
> > +serialize_histogram(const struct stats *record);
> > +
> > +/*
> > + * get a string representation of the current runstate
> > + */
> > +const char *
> > +runstate_tostring(int s);
> > +
> > +#endif /* _L2REFLECT_STATS_H_ */
> > diff --git a/app/l2reflect/utils.c b/app/l2reflect/utils.c
> > new file mode 100644
> > index 0000000000..4116b986d2
> > --- /dev/null
> > +++ b/app/l2reflect/utils.c
> > @@ -0,0 +1,67 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2021 Siemens AG
> > + */
> > +
> > +#include <rte_ethdev.h>
> > +#include <rte_log.h>
> > +#include <rte_ethdev.h>
> > +
> > +#include "utils.h"
> > +#include "l2reflect.h"
> > +
> > +void
> > +assert_link_status(int port_id)
> > +{
> > + struct rte_eth_link link;
> > + uint8_t rep_cnt = MAX_REPEAT_TIMES;
> > + int link_get_err = -EINVAL;
> > +
> > + memset(&link, 0, sizeof(link));
> > + do {
> > + link_get_err = rte_eth_link_get_nowait(port_id, &link);
> > + if (link_get_err == 0 && link.link_status == RTE_ETH_LINK_UP)
> > + break;
> > + rte_delay_ms(CHECK_INTERVAL);
> > + RTE_LOG(INFO, L2REFLECT, "Link not ready yet, try again...\n");
> > + } while (--rep_cnt && (l2reflect_state != S_LOCAL_TERM));
> > +
> > + if (link_get_err < 0)
> > + rte_exit(EXIT_FAILURE, "error: link get is failing: %s\n",
> > + rte_strerror(-link_get_err));
> > + if (link.link_status == RTE_ETH_LINK_DOWN)
> > + rte_exit(EXIT_FAILURE, "error: link is still down\n");
> > +
> > + const char *linkspeed_str = rte_eth_link_speed_to_str(link.link_speed);
> > + RTE_LOG(INFO, L2REFLECT,
> > + "Link status on port %d: speed: %s, duplex: %s\n",
> > + port_id, linkspeed_str,
> > + link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX ? "full" :
> "half");
> > +}
> > +
> > +uint32_t
> > +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> > +{
> > + uint32_t overhead_len;
> > + if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> > + overhead_len = max_rx_pktlen - max_mtu;
> > + else
> > + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> > + return overhead_len;
> > +}
> > +
> > +int
> > +config_port_max_pkt_len(struct rte_eth_conf *conf,
> > + struct rte_eth_dev_info
> *dev_info)
> > +{
> > + uint32_t overhead_len;
> > + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
> > + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN)
> > + return -1;
> > + overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
> > + dev_info->max_mtu);
> > + conf->rxmode.mtu = MAX(l2reflect_pkt_bytes - overhead_len,
> > + dev_info->min_mtu);
> > + if (conf->rxmode.mtu > RTE_ETHER_MTU)
> > + conf->txmode.offloads |=
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
> > + return 0;
> > +}
> > diff --git a/app/l2reflect/utils.h b/app/l2reflect/utils.h
> > new file mode 100644
> > index 0000000000..177ad8cda6
> > --- /dev/null
> > +++ b/app/l2reflect/utils.h
> > @@ -0,0 +1,20 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2021 Siemens AG
> > + */
> > +
> > +#ifndef _L2REFLECT_UTILS_H_
> > +#define _L2REFLECT_UTILS_H_
> > +
> > +#define MAX_REPEAT_TIMES 30
> > +#define CHECK_INTERVAL 2000
> > +
> > +void assert_link_status(int port_id);
> > +
> > +uint32_t
> > +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu);
> > +
> > +int
> > +config_port_max_pkt_len(struct rte_eth_conf *conf,
> > + struct rte_eth_dev_info *dev_info);
> > +
> > +#endif /* _L2REFLECT_UTILS_H_ */
> > diff --git a/app/meson.build b/app/meson.build
> > index 0ea04cadeb..3593322ab2 100644
> > --- a/app/meson.build
> > +++ b/app/meson.build
> > @@ -3,6 +3,7 @@
> >
> > apps = [
> > 'dumpcap',
> > + 'l2reflect',
> > 'pdump',
> > 'proc-info',
> > 'test-acl',
Hi Felix,
On 9/25/22 10:01, Moessbauer, Felix wrote:
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Tuesday, September 20, 2022 10:02 PM
>> To: Moessbauer, Felix (T CED SES-DE) <felix.moessbauer@siemens.com>;
>> dev@dpdk.org
>> Cc: Schild, Henning (T CED SES-DE) <henning.schild@siemens.com>; Kiszka, Jan
>> (T CED) <jan.kiszka@siemens.com>; thomas@monjalon.net; Marcelo Tosatti
>> <mtosatti@redhat.com>
>> Subject: Re: [PATCH v6 2/2] Add l2reflect measurement application
>>
>> Hi Felix,
>>
>> First, I support the idea of having the l2reflect application part of
>> the DPDK repository.
>
> Hi Maxime,
>
> Many thanks for the review.
> I'm currently travelling but plan to address all responses regarding l2reflect in the upcoming week.
Do you still target v22.11 for getting l2reflect in?
Regards,
Maxime
> Best regards,
> Felix
>
>>
>> Please note CI failed to build it on different platforms:
>> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fmails.dpd
>> k.org%2Farchives%2Ftest-report%2F2022-
>> September%2F304617.html&data=05%7C01%7Cfelix.moessbauer%40siem
>> ens.com%7Cdd1323af309a42d0a12b08da9b10a598%7C38ae3bcd95794fd4adda
>> b42e1495d55a%7C1%7C0%7C637992793049685048%7CUnknown%7CTWFpbGZ
>> sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0
>> %3D%7C3000%7C%7C%7C&sdata=eM6SKiinN6Jc%2Fb1lAYpY2TX1x7hxMzA
>> 3iGetLzRzL0g%3D&reserved=0
>>
>> It also fails to build on my Fc35 machine:
>> [3237/3537] Compiling C object app/dpdk-l2reflect.p/l2reflect_main.c.o
>> ../app/l2reflect/main.c: In function 'l2reflect_main_loop':
>> ../app/l2reflect/main.c:560:19: warning: array subscript 'uint64_t {aka
>> long unsigned int}[0]' is partly outside array bounds of 'struct
>> rte_ether_addr[1]' [-Warray-bounds]
>> 560 | i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) &
>> MAC_ADDR_CMP) >
>> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> ../app/l2reflect/main.c:110:23: note: while referencing
>> 'l2reflect_port_eth_addr'
>> 110 | struct rte_ether_addr l2reflect_port_eth_addr;
>> | ^~~~~~~~~~~~~~~~~~~~~~~
>> ../app/l2reflect/main.c:561:27: warning: array subscript 'uint64_t {aka
>> long unsigned int}[0]' is partly outside array bounds of 'struct
>> rte_ether_addr[1]' [-Warray-bounds]
>> 561 | (*((uint64_t
>> *)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));
>> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> ../app/l2reflect/main.c:111:23: note: while referencing
>> 'l2reflect_remote_eth_addr'
>> 111 | struct rte_ether_addr l2reflect_remote_eth_addr;
>> | ^~~~~~~~~~~~~~~~~
>>
>> Some more comments inline:
>>
>> On 9/2/22 10:45, Felix Moessbauer wrote:
>>> The l2reflect application implements a ping-pong benchmark to
>>> measure the latency between two instances. For communication,
>>> we use raw ethernet and send one packet at a time. The timing data
>>> is collected locally and min/max/avg values are displayed in a TUI.
>>> Finally, a histogram of the latencies is printed which can be
>>> further processed with the jitterdebugger visualization scripts.
>>> To debug latency spikes, a max threshold can be defined.
>>> If it is hit, a trace point is created on both instances.
>>>
>>> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
>>> Signed-off-by: Henning Schild <henning.schild@siemens.com>
>>> ---
>>> app/l2reflect/colors.c | 34 ++
>>> app/l2reflect/colors.h | 19 +
>>> app/l2reflect/l2reflect.h | 53 ++
>>> app/l2reflect/main.c | 1007 +++++++++++++++++++++++++++++++++++++
>>> app/l2reflect/meson.build | 21 +
>>> app/l2reflect/payload.h | 26 +
>>> app/l2reflect/stats.c | 225 +++++++++
>>> app/l2reflect/stats.h | 67 +++
>>> app/l2reflect/utils.c | 67 +++
>>> app/l2reflect/utils.h | 20 +
>>> app/meson.build | 1 +
>>> 11 files changed, 1540 insertions(+)
>>> create mode 100644 app/l2reflect/colors.c
>>> create mode 100644 app/l2reflect/colors.h
>>> create mode 100644 app/l2reflect/l2reflect.h
>>> create mode 100644 app/l2reflect/main.c
>>> create mode 100644 app/l2reflect/meson.build
>>> create mode 100644 app/l2reflect/payload.h
>>> create mode 100644 app/l2reflect/stats.c
>>> create mode 100644 app/l2reflect/stats.h
>>> create mode 100644 app/l2reflect/utils.c
>>> create mode 100644 app/l2reflect/utils.h
>>
>> If we agree to have this application in app/ directory,
>> I think you'll have to add documentation for this new tool in
>> doc/guides/tools/.
>>
>>> diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
>>> new file mode 100644
>>> index 0000000000..af881d8788
>>> --- /dev/null
>>> +++ b/app/l2reflect/colors.c
>>> @@ -0,0 +1,34 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + */
>>> +
>>> +#include "colors.h"
>>> +
>>> +const struct color_palette *colors;
>>> +
>>> +static const struct color_palette color_palette_default = {
>>> + .red = "\x1b[01;31m",
>>> + .green = "\x1b[01;32m",
>>> + .yellow = "\x1b[01;33m",
>>> + .blue = "\x1b[01;34m",
>>> + .magenta = "\x1b[01;35m",
>>> + .cyan = "\x1b[01;36m",
>>> + .reset = "\x1b[0m"
>>> +};
>>> +
>>> +static const struct color_palette color_palette_bw = { .red = "",
>>> + .green = "",
>>> + .yellow = "",
>>> + .blue = "",
>>> + .magenta = "",
>>> + .cyan = "",
>>> + .reset = "" };
>>> +
>>> +void
>>> +enable_colors(int enable)
>>> +{
>>> + if (enable)
>>> + colors = &color_palette_default;
>>> + else
>>> + colors = &color_palette_bw;
>>> +}
>>> diff --git a/app/l2reflect/colors.h b/app/l2reflect/colors.h
>>> new file mode 100644
>>> index 0000000000..346547138b
>>> --- /dev/null
>>> +++ b/app/l2reflect/colors.h
>>> @@ -0,0 +1,19 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + */
>>> +#ifndef _L2REFLECT_COLORS_H_
>>> +#define _L2REFLECT_COLORS_H_
>>> +
>>> +/* posix terminal colors */
>>> +struct color_palette {
>>> + const char *red, *green, *yellow, *blue, *magenta, *cyan, *reset;
>>> +};
>>> +
>>> +/* ptr to the current tui color palette */
>>> +extern const struct color_palette *colors;
>>> +
>>> +/* disable colored output */
>>> +void
>>> +enable_colors(int enable);
>>> +
>>> +#endif /* _L2REFLECT_COLORS_H_ */
>>> diff --git a/app/l2reflect/l2reflect.h b/app/l2reflect/l2reflect.h
>>> new file mode 100644
>>> index 0000000000..922bd7c281
>>> --- /dev/null
>>> +++ b/app/l2reflect/l2reflect.h
>>> @@ -0,0 +1,53 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + */
>>> +
>>> +#include <stdatomic.h>
>>> +
>>> +#ifndef _L2REFLECT_L2REFLECT_H_
>>> +#define _L2REFLECT_L2REFLECT_H_
>>> +
>>> +#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
>>> +
>>> +/* max size that common 1G NICs support */
>>> +#define MAX_JUMBO_PKT_LEN 9600
>>> +
>>> +/* Used to compare MAC addresses. */
>>> +#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
>>> +
>>> +#define MAX(x, y) (((x) > (y)) ? (x) : (y))
>>> +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
>>> +
>>> +enum {
>>> + TRACE_TYPE_DATA,
>>> + TRACE_TYPE_HELO,
>>> + TRACE_TYPE_EHLO,
>>> + TRACE_TYPE_RSET,
>>> + TRACE_TYPE_QUIT,
>>> +};
>>> +
>>> +enum STATE {
>>> + /* elect the initial sender */
>>> + S_ELECT_LEADER = 1,
>>> + /* reset the counters */
>>> + S_RESET_TRX = 2,
>>> + /* measurement S_RUNNING */
>>> + S_RUNNING = 4,
>>> + /* terminated by local event */
>>> + S_LOCAL_TERM = 8,
>>> + /* terminated by remote event */
>>> + S_REMOTE_TERM = 16
>>> +};
>>> +
>>> +extern int l2reflect_hist;
>>> +extern unsigned int l2reflect_hist_buckets;
>>> +extern atomic_int l2reflect_output_hist;
>>> +extern int l2reflect_interrupt;
>>> +extern uint64_t l2reflect_sleep_msec;
>>> +extern uint64_t l2reflect_pkt_bytes;
>>> +extern uint16_t l2reflect_port_number;
>>> +extern atomic_int l2reflect_state;
>>> +extern struct rte_ether_addr l2reflect_port_eth_addr;
>>> +extern struct rte_ether_addr l2reflect_remote_eth_addr;
>>> +
>>> +#endif /* _L2REFLECT_L2REFLECT_H_ */
>>> diff --git a/app/l2reflect/main.c b/app/l2reflect/main.c
>>> new file mode 100644
>>> index 0000000000..33a87e8fad
>>> --- /dev/null
>>> +++ b/app/l2reflect/main.c
>>> @@ -0,0 +1,1007 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + *
>>> + * The l2reflect application implements a ping-pong benchmark to
>>> + * measure the latency between two instances. For communication,
>>> + * we use raw ethernet and send one packet at a time. The timing data
>>> + * is collected locally and min/max/avg values are displayed in a TUI.
>>> + * Finally, a histogram of the latencies is printed which can be
>>> + * further processed with the jitterdebugger visualization scripts.
>>> + * To debug latency spikes, a max threshold can be defined.
>>> + * If it is hit, a trace point is created on both instances.
>>> + *
>>> + * Examples:
>>> + * launch (non-rt kernel): l2reflect --lcores 0@0,1@6 -n 1
>>> + * launch (rt kernel): l2reflect --lcores 0@0,1@6 -n 1 -- -P 50 -r -l
>>> + *
>>> + * For histogram data, launch with -H <usec> -F <output file>, e.g.
>>> + * -H 10 -F histogram.json for a histogram with 10 usec buckets which
>>> + * is written to a histogram.json file. This file can then be visualized
>>> + * using the jitterdebugger plotting scripts:
>>> + * jitterplot hist histogram.json
>>> + *
>>> + * While the application is running, it can be controlled by sending
>>> + * signals to one of the processes:
>>> + * - SIGUSR1: reset the min/max/avg on both instances
>>> + * - SIGUSR2: output / write the current histogram
>>> + * - SIGHUP/SIGINT: gracefully terminate both instances
>>> + *
>>> + * Note on wiring:
>>> + * The l2reflect application sends the packets via a physical ethernet
>>> + * interface. When running both instances on a single system, at least
>>> + * two dedicated physical ports and a (physical) loopback between them
>>> + * is required.
>>
>> Above text could be used as a basis for the documentation.
>>
>>> + */
>>> +
>>> +#include <stdio.h>
>>> +#include <errno.h>
>>> +#include <stdlib.h>
>>> +#include <string.h>
>>> +#include <stdint.h>
>>> +#include <time.h>
>>> +#include <inttypes.h>
>>> +#include <getopt.h>
>>> +#include <sys/signal.h>
>>> +#include <assert.h>
>>> +#include <unistd.h>
>>> +#ifdef HAS_SYS_IO
>>> +#include <sys/io.h>
>>> +#endif
>>> +#include <sched.h>
>>> +#include <sys/mman.h>
>>> +#include <stdatomic.h>
>>> +
>>> +#include <rte_common.h>
>>> +#include <rte_errno.h>
>>> +#include <rte_log.h>
>>> +#include <rte_memory.h>
>>> +#include <rte_memcpy.h>
>>> +#include <rte_memzone.h>
>>> +#include <rte_eal.h>
>>> +#include <rte_eal_trace.h>
>>> +#include <rte_per_lcore.h>
>>> +#include <rte_launch.h>
>>> +#include <rte_atomic.h>
>>> +#include <rte_cycles.h>
>>> +#include <rte_prefetch.h>
>>> +#include <rte_lcore.h>
>>> +#include <rte_per_lcore.h>
>>> +#include <rte_branch_prediction.h>
>>> +#include <rte_interrupts.h>
>>> +#include <rte_random.h>
>>> +#include <rte_debug.h>
>>> +#include <rte_ether.h>
>>> +#include <rte_ethdev.h>
>>> +#include <rte_ring.h>
>>> +#include <rte_mempool.h>
>>> +#include <rte_mbuf.h>
>>> +
>>> +#include "l2reflect.h"
>>> +#include "payload.h"
>>> +#include "utils.h"
>>> +#include "colors.h"
>>> +#include "stats.h"
>>> +
>>> +#define NSEC_PER_SEC 1000000000
>>> +
>>> +#define NB_MBUF 2047
>>> +
>>> +#define MAX_PKT_BURST 32
>>> +/* warmup a few round before starting the measurement */
>>> +#define WARMUP_ROUNDS 42
>>
>> Would it make sense to have this as default value and provide the user
>> with the possibility to configure it via command line?
>>
>>> +
>>> +/* break after one second */
>>> +#define DEFAULT_BREAKVAL_USEC 1000000ull
>>> +/* break if no rx for more than this rounds */
>>> +#define RX_TIMEOUT_MASK ~0xFFFFFull
>>> +
>>> +/* delay between two election packets */
>>> +#define DELAY_ELECTION_MS 500
>>> +
>>> +int l2reflect_hist;
>>> +unsigned int l2reflect_hist_buckets = HIST_NUM_BUCKETS_DEFAULT;
>>> +atomic_int l2reflect_output_hist;
>>> +int l2reflect_fake_mac;
>>> +int l2reflect_interrupt;
>>> +uint64_t l2reflect_sleep_msec;
>>> +uint64_t l2reflect_pkt_bytes = 64;
>>> +uint16_t l2reflect_port_number;
>>> +atomic_int l2reflect_state;
>>> +struct rte_ether_addr l2reflect_port_eth_addr;
>>> +struct rte_ether_addr l2reflect_remote_eth_addr;
>>> +
>>> +static struct timespec last_sent, last_recv;
>>> +static int quiet, disable_int, priority, policy, l2reflect_mlock;
>>> +
>>> +static atomic_int sleep_start;
>>> +static uint64_t rounds;
>>> +
>>> +/* Configurable number of RX/TX ring descriptors */
>>> +#define RTE_TEST_RX_DESC_DEFAULT 128
>>> +#define RTE_TEST_TX_DESC_DEFAULT 128
>>> +static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
>>> +static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>>> +
>>> +static struct rte_eth_conf port_conf = {
>>> + .rxmode = {
>>> + .split_hdr_size = 0,
>>> + },
>>> + .txmode = {
>>> + .mq_mode = RTE_ETH_MQ_TX_NONE,
>>> + },
>>> +};
>>> +
>>> +static uint32_t l2reflect_q;
>>> +static uint64_t l2reflect_break_usec = DEFAULT_BREAKVAL_USEC;
>>> +
>>> +static struct rte_ether_addr ether_bcast_addr = {
>>> + .addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
>>> +};
>>> +
>>> +struct rte_mempool *l2reflect_pktmbuf_pool;
>>> +
>>> +static void
>>> +l2reflect_usage(const char *prgname)
>>> +{
>>> + printf("%s [EAL options] -- [-p PORT] -P [PRIO] [-b USEC] [-n SIZE] [-r] [-
>> f] [-l]"
>>> + "[-q] [-d] [-H USEC] [-B NUM] [-F FILE] [-S] [-i MSEC] [-m] [-c] [-
>> h]\n"
>>> + " -p PORT: port to configure\n"
>>> + " -P PRIO: scheduling priority to use\n"
>>> + " -b USEC: break when latency > USEC\n"
>>> + " -n SIZE: size of packet in bytes [%i,%i]\n"
>>> + " (when using jumbo frames, sender and receiver values have
>> to match)\n"
>>> + " -r: scheduling policy SCHED_RR\n"
>>> + " -f: scheduling policy SCHED_FIFO\n"
>>> + " -l: lock memory (mlockall)\n"
>>> + " -q: quiet, do not print stats\n"
>>> +#ifdef HAS_SYS_IO
>>> + " -d: ignore maskable interrupts\n"
>>> +#endif
>>> + " -H USEC: create histogram of latencies with USEC time slices\n"
>>> + " -B NUM: number of histogram buckets\n"
>>> + " -F FILE: write histogram to file\n"
>>> + " -S: start processing threads in sleep, wake with SIGCONT\n"
>>> + " -i MSEC: use interrupts instead of polling (cont. on interrupt or
>> after MSEC)\n"
>>> + " -m: fake the source mac addr by adding 1 to the last tuple\n"
>>> + " -c: disable colored output\n"
>>> + " -h: display help message\n",
>>> + prgname, RTE_ETHER_MIN_LEN, MAX_JUMBO_PKT_LEN);
>>> +}
>>> +
>>> +static int
>>> +check_opts_for_help(int argc, char **argv, void(*display_help)(const char *))
>>> +{
>>> + if (argc > 2 && !strncmp(argv[1], "--", 3)) {
>>> + if (!strncmp(argv[2], "-h", 3) || !strncmp(argv[2], "--help", 7)) {
>>> + display_help(argv[0]);
>>> + return 1;
>>> + }
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +/* Parse the argument given in the command line of the application */
>>> +static int
>>> +l2reflect_parse_args(int argc, char **argv)
>>> +{
>>> + int opt, ret;
>>> + char **argvopt;
>>> + int option_index;
>>> + int opt_colors = 1;
>>> + char *prgname = argv[0];
>>> + static struct option lgopts[] = { { NULL, 0, 0, 0 } };
>>> +
>>> + argvopt = argv;
>>> + policy = SCHED_OTHER;
>>> + hist_filename = NULL;
>>> + l2reflect_output_hist = 0;
>>> +
>>> + while ((opt = getopt_long(argc, argvopt, "p:P:b:H:B:F:i:n:qdrflScm",
>> lgopts,
>>> + &option_index)) != EOF) {
>>> + switch (opt) {
>>> + /* port */
>>> + case 'p':
>>> + l2reflect_port_number =
>>> + (uint16_t)strtoul(optarg, NULL, 10);
>>> + break;
>>> + case 'P':
>>> + priority = strtoul(optarg, NULL, 10);
>>> + if (priority > 0) {
>>> + if (policy == SCHED_OTHER)
>>> + policy = SCHED_RR;
>>> + l2reflect_mlock = 1;
>>> + }
>>> + break;
>>> + case 'b':
>>> + l2reflect_break_usec = strtoul(optarg, NULL, 10);
>>> + break;
>>> + case 'S':
>>> + sleep_start = 1;
>>> + break;
>>> + case 'q':
>>> + quiet = 1;
>>> + break;
>>> + case 'd':
>>> + disable_int = 1;
>>> + break;
>>> + case 'r':
>>> + policy = SCHED_RR;
>>> + break;
>>> + case 'f':
>>> + policy = SCHED_FIFO;
>>> + break;
>>> + case 'l':
>>> + l2reflect_mlock = 1;
>>> + break;
>>> + case 'H':
>>> + l2reflect_hist = 1;
>>> + hist_bucket_usec = strtoul(optarg, NULL, 10);
>>> +#ifndef RTE_HAS_JANSSON
>>> + printf("not compiled with cjson support\n");
>>
>> s/cjson/jansson/
>>
>>> + return -1;
>>> +#endif
>>> + break;
>>> + case 'B':
>>> + l2reflect_hist_buckets = strtoul(optarg, NULL, 10);
>>> + break;
>>> + case 'F':
>>> + hist_filename = strndup(optarg, 128);
>>> + break;
>>> + case 'i':
>>> + l2reflect_interrupt = 1;
>>> + l2reflect_sleep_msec = strtoul(optarg, NULL, 10);
>>> + break;
>>> + case 'n':
>>> + l2reflect_pkt_bytes = strtoull(optarg, NULL, 10);
>>> + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
>>> + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN) {
>>> + printf("packet size %" PRIu64 " not valid\n",
>> l2reflect_pkt_bytes);
>>> + return -1;
>>> + }
>>> + if (l2reflect_pkt_bytes >
>> RTE_MBUF_DEFAULT_DATAROOM) {
>>> + printf("NOT IMPLEMENTED. Packet size %"
>> PRIu64 " requires segmented buffers.\n",
>>> + l2reflect_pkt_bytes);
>>> + return -1;
>>> + }
>>> + break;
>>> + case 'c':
>>> + opt_colors = 0;
>>> + break;
>>> + case 'm':
>>> + l2reflect_fake_mac = 1;
>>> + break;
>>> + default:
>>> + l2reflect_usage(prgname);
>>> + return -1;
>>> + }
>>> + }
>>> +
>>> + if (optind >= 0)
>>> + argv[optind - 1] = prgname;
>>> +
>>> + if (hist_filename && !l2reflect_hist) {
>>> + printf("-F switch requires -H switch as well\n");
>>> + return -1;
>>> + }
>>> +
>>> + /* output is redirected, disable coloring */
>>> + if (!isatty(fileno(stdout)))
>>> + opt_colors = 0;
>>> +
>>> + enable_colors(opt_colors);
>>> +
>>> + ret = optind - 1;
>>> + optind = 0; /* reset getopt lib */
>>> + return ret;
>>> +}
>>> +
>>> +/* Send a burst of one packet */
>>> +static inline int
>>> +l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
>>> +{
>>> + unsigned int ret;
>>> + struct rte_ether_hdr *eth;
>>> + struct my_magic_packet *pkt;
>>> + uint16_t type;
>>> +
>>> + eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
>>> + pkt = (struct my_magic_packet *)eth;
>>> + type = pkt->type;
>>> +
>>> + if (likely(type == TRACE_TYPE_DATA))
>>> + clock_gettime(CLOCK_MONOTONIC, &last_sent);
>>> + ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
>>> + if (unlikely(ret < 1))
>>> + rte_pktmbuf_free(*m);
>>> + return 0;
>>> +}
>>> +
>>> +static inline void
>>> +l2reflect_simple_forward(struct rte_mbuf *m)
>>> +{
>>> + struct rte_ether_hdr *eth;
>>> + struct my_magic_packet *pkt;
>>> +
>>> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
>>> + pkt = (struct my_magic_packet *)eth;
>>> +
>>> + /* dst addr */
>>> + rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
>>> + /* src addr */
>>> + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
>>> +
>>> + if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
>>> + rte_eal_trace_generic_str("sending traced packet");
>>> +
>>> + l2reflect_send_packet(&m, l2reflect_port_number);
>>> +}
>>> +
>>> +static struct rte_mbuf *
>>> +l2reflect_new_pkt(unsigned int type)
>>> +{
>>> + struct rte_mbuf *m;
>>> + struct rte_ether_hdr *eth;
>>> + struct my_magic_packet *pkt;
>>> + uint64_t frame_bytes = RTE_ETHER_MIN_LEN;
>>> +
>>> + m = rte_pktmbuf_alloc(l2reflect_pktmbuf_pool);
>>> + if (m == NULL)
>>> + rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc failed\n");
>>> +
>>> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
>>> +
>>> + if (type == TRACE_TYPE_DATA)
>>> + frame_bytes = l2reflect_pkt_bytes;
>>> +
>>> + /* zero out packet to make dumps better readable */
>>> + memset(eth, 0, frame_bytes - RTE_ETHER_CRC_LEN);
>>> +
>>> + if (type == TRACE_TYPE_HELO)
>>> + rte_ether_addr_copy(ðer_bcast_addr, ð->dst_addr);
>>> + else
>>> + rte_ether_addr_copy(&l2reflect_remote_eth_addr, ð-
>>> dst_addr);
>>> +
>>> + /* src addr */
>>> + rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
>>> + eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT);
>>> +
>>> + m->data_len = frame_bytes - RTE_ETHER_CRC_LEN;
>>> + m->pkt_len = frame_bytes - RTE_ETHER_CRC_LEN;
>>> +
>>> + pkt = (struct my_magic_packet *)eth;
>>> + pkt->type = type;
>>> + pkt->breakval = l2reflect_break_usec;
>>> + pkt->req_pkt_bytes = l2reflect_pkt_bytes;
>>> +
>>> + return m;
>>> +}
>>> +
>>> +static void
>>> +l2reflect_send_reset(void)
>>> +{
>>> + struct rte_mbuf *m;
>>> + m = l2reflect_new_pkt(TRACE_TYPE_RSET);
>>> + l2reflect_send_packet(&m, l2reflect_port_number);
>>> +}
>>> +
>>> +static void
>>> +l2reflect_send_quit(void)
>>> +{
>>> + struct rte_mbuf *m;
>>> + m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
>>> + l2reflect_send_packet(&m, l2reflect_port_number);
>>> +}
>>> +
>>> +static void
>>> +l2reflect_new_ball(void)
>>> +{
>>> + struct rte_mbuf *pnewball;
>>> + struct rte_ether_hdr *eth;
>>> + struct my_magic_packet *pkt;
>>> + char mac_src_str[RTE_ETHER_ADDR_FMT_SIZE];
>>> + char mac_dst_str[RTE_ETHER_ADDR_FMT_SIZE];
>>> +
>>> + RTE_LOG(INFO, L2REFLECT, "Should create a packet to play with ...\n");
>>> + pnewball = l2reflect_new_pkt(TRACE_TYPE_DATA);
>>> +
>>> + eth = rte_pktmbuf_mtod(pnewball, struct rte_ether_hdr *);
>>> +
>>> + rte_ether_format_addr(mac_src_str, sizeof(mac_src_str),
>> &l2reflect_port_eth_addr);
>>> + rte_ether_format_addr(mac_dst_str, sizeof(mac_dst_str),
>> &l2reflect_remote_eth_addr);
>>> + RTE_LOG(INFO, L2REFLECT, "from MAC address: %s to %s\n\n",
>> mac_src_str, mac_dst_str);
>>> +
>>> + pkt = (struct my_magic_packet *)eth;
>>> +
>>> + /* we are tracing lets tell the others */
>>> + if (l2reflect_break_usec)
>>> + pkt->magic = MAGIC_TRACE_PAYLOAD;
>>> +
>>> + l2reflect_send_packet(&pnewball, l2reflect_port_number);
>>> +}
>>> +
>>> +static inline int64_t
>>> +calcdiff_ns(struct timespec t1, struct timespec t2)
>>> +{
>>> + int64_t diff;
>>> + diff = NSEC_PER_SEC * (int64_t)((int)t1.tv_sec - (int)t2.tv_sec);
>>> + diff += ((int)t1.tv_nsec - (int)t2.tv_nsec);
>>> + return diff;
>>> +}
>>> +
>>> +/* filter the received packets for actual l2reflect messages */
>>> +static inline unsigned int
>>> +l2reflect_rx_filter(
>>> + struct rte_mbuf *buf)
>>> +{
>>> + struct rte_ether_hdr *eth;
>>> + eth = rte_pktmbuf_mtod(buf, struct rte_ether_hdr *);
>>> +
>>> + if (unlikely(buf->nb_segs > 1))
>>> + RTE_LOG(WARNING, L2REFLECT, "Segmented packet: data-len:
>> %i, pkt-len: %i, #seg: %i\n",
>>> + buf->data_len, buf->pkt_len, buf->nb_segs);
>>> +
>>> + /* check for the l2reflect ether type */
>>> + if (unlikely(eth->ether_type !=
>> rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT)))
>>> + return 0;
>>> +
>>> + /*
>>> + * if the packet is not from our partner
>>> + * (and we already have a partner), drop it
>>> + */
>>> + if (unlikely(l2reflect_state != S_ELECT_LEADER &&
>>> + !rte_is_same_ether_addr(ð->src_addr,
>> &l2reflect_remote_eth_addr)))
>>> + return 0;
>>> +
>>> + /* filter bounce-back packets */
>>> + if (unlikely(rte_is_same_ether_addr(ð->src_addr,
>> &l2reflect_port_eth_addr)))
>>> + return 0;
>>> +
>>> + return 1;
>>> +}
>>> +
>>> +/*
>>> + * automatically elect the leader of the benchmark by
>>> + * sending out HELO packets and waiting for responses.
>>> + * On response, the mac addresses are compared and the
>>> + * numerically larger one becomes the leader.
>>> + */
>>> +static int
>>> +elect_leader(uint16_t portid)
>>> +{
>>> + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>>> + struct rte_mbuf *m;
>>> + struct rte_ether_hdr *eth;
>>> + struct rte_ether_addr *src_addr;
>>> + struct rte_eth_dev_info dev_info;
>>> + struct my_magic_packet *pkt;
>>> + unsigned int i, nb_rx;
>>> + int ehlo_send = 0;
>>> + int i_win;
>>> +
>>> + while (l2reflect_state == S_ELECT_LEADER) {
>>> + /* send a packet to make sure the MAC addr of this interface is
>> publicly known */
>>> + m = l2reflect_new_pkt(TRACE_TYPE_HELO);
>>> + RTE_LOG(INFO, L2REFLECT, "looking for player HELO\n");
>>> + l2reflect_send_packet(&m, l2reflect_port_number);
>>> + rte_delay_ms(DELAY_ELECTION_MS);
>>> +
>>> + /* receive election packets */
>>> + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
>>> + MAX_PKT_BURST);
>>> +
>>> + /* note: do not short-circuit as otherwise the mbufs are not
>> freed */
>>> + for (i = 0; i < nb_rx; i++) {
>>> + m = pkts_burst[i];
>>> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
>>> + src_addr = ð->src_addr;
>>> + pkt = (struct my_magic_packet *)eth;
>>> +
>>> + if (!l2reflect_rx_filter(m)) {
>>> + rte_pktmbuf_free(m);
>>> + continue;
>>> + }
>>> +
>>> + if (pkt->type == TRACE_TYPE_EHLO && l2reflect_state
>> == S_ELECT_LEADER) {
>>> + /* check if both modes are equal */
>>> + if (((l2reflect_pkt_bytes <= RTE_ETHER_MTU)
>>> + != (pkt->req_pkt_bytes <=
>> RTE_ETHER_MTU))) {
>>> + l2reflect_state = S_LOCAL_TERM;
>>> + m =
>> l2reflect_new_pkt(TRACE_TYPE_QUIT);
>>> + l2reflect_send_packet(&m,
>> l2reflect_port_number);
>>> + rte_exit(EXIT_FAILURE,
>>> + "remote and local jumbo
>> config does not match "
>>> + "(%" PRIu64 " vs %" PRIu64
>> ")\n",
>>> + l2reflect_pkt_bytes, pkt-
>>> req_pkt_bytes);
>>> + }
>>> + if (l2reflect_pkt_bytes != pkt->req_pkt_bytes) {
>>> + l2reflect_pkt_bytes =
>> MAX(l2reflect_pkt_bytes,
>>> + pkt-
>>> req_pkt_bytes);
>>> +
>> rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
>>> + const uint32_t overhead_len =
>> eth_dev_get_overhead_len(
>>> +
>> dev_info.max_rx_pktlen,
>>> +
>> dev_info.max_mtu);
>>> + const uint16_t mtu =
>> MAX(l2reflect_pkt_bytes - overhead_len,
>>> +
>> dev_info.min_mtu);
>>> + RTE_LOG(INFO, L2REFLECT,
>>> + "update frame sizes: frame: %"
>> PRIu64 ", MTU %d\n",
>>> + l2reflect_pkt_bytes, mtu);
>>> + const int ret = rte_eth_dev_set_mtu(
>>> +
>> l2reflect_port_number,
>>> + mtu);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE, "failed
>> to update MTU: %s\n",
>>> + strerror(-ret));
>>> + }
>>> +
>>> + if (ehlo_send) {
>>> + l2reflect_state = S_RUNNING;
>>> + RTE_LOG(INFO, L2REFLECT, "Enter
>> running state\n");
>>> + }
>>> + }
>>> + /* we got a HELO packet, respond with EHLO */
>>> + if (pkt->type == TRACE_TYPE_HELO) {
>>> + char
>> mac_str_other[RTE_ETHER_ADDR_FMT_SIZE];
>>> + rte_ether_addr_copy(src_addr,
>> &l2reflect_remote_eth_addr);
>>> + m = l2reflect_new_pkt(TRACE_TYPE_EHLO);
>>> + rte_ether_format_addr(
>>> + mac_str_other, sizeof(mac_str_other),
>> &l2reflect_remote_eth_addr);
>>> + RTE_LOG(INFO, L2REFLECT, "found one HELO
>> from %s\n", mac_str_other);
>>> + l2reflect_send_packet(&m,
>> l2reflect_port_number);
>>> + ehlo_send = 1;
>>> + }
>>> + rte_pktmbuf_free(m);
>>
>> The loop content could be moved in a dedicated function, it would
>> improve the readability.
>>
>>> + }
>>> + }
>>> +
>>> + if (rte_is_same_ether_addr(&l2reflect_port_eth_addr,
>> &l2reflect_remote_eth_addr))
>>> + rte_exit(EXIT_FAILURE, "talking to myself ... confused\n");
>>> +
>>> + /* the one with the bigger MAC is the leader */
>>> + i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) & MAC_ADDR_CMP)
>>>
>>> + (*((uint64_t *)&l2reflect_remote_eth_addr) &
>> MAC_ADDR_CMP));
>>> +
>>> + RTE_LOG(INFO, L2REFLECT, "i am the \"%s\"\n", i_win ? "rick" :
>> "morty");
>>> +
>>> + return i_win;
>>> +}
>>> +
>>> +/*
>>> + * add the measured time diff to the statistics.
>>> + * return false if threshold is hit
>>> + */
>>> +static inline int
>>> +add_to_record(const uint64_t diff)
>>> +{
>>> + record.rounds++;
>>> + /* do not count the first rounds, diff would be too high */
>>> + if (record.rounds < WARMUP_ROUNDS)
>>> + return 1;
>>> +
>>> + if (l2reflect_hist) {
>>> + const uint64_t bucket =
>>> + MIN(diff / (hist_bucket_usec * 1000),
>> l2reflect_hist_buckets-1);
>>> + record.hist[bucket]++;
>>> + }
>>> +
>>> + record.avg_round_ns += (double)diff;
>>> + if (diff < record.min_round_ns)
>>> + record.min_round_ns = diff;
>>> + if (diff > record.max_round_ns) {
>>> + record.max_round_ns = diff;
>>> + if (l2reflect_break_usec &&
>>> + (record.max_round_ns > (l2reflect_break_usec * 1000)))
>>> + return 0;
>>> + }
>>> + return 1;
>>> +}
>>> +
>>> +/*
>>> + * process a single packet.
>>> + * return false if latency threshold is hit
>>> + */
>>> +static inline int
>>> +process_packet(
>>> + struct my_magic_packet *pkt,
>>> + struct timespec *rx_time,
>>> + uint64_t *diff)
>>> +{
>>> + if (pkt->type == TRACE_TYPE_DATA) {
>>> + rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
>>> + *diff = calcdiff_ns(last_recv, last_sent);
>>> + if (!unlikely(add_to_record(*diff))) {
>>> + /* TODO: improve tracing */
>>> + rte_eal_trace_generic_u64(record.max_round_ns /
>> 1000);
>>> + return 0;
>>> + }
>>> + }
>>> + if (pkt->magic == MAGIC_TRACE_PAYLOAD)
>>> + rte_eal_trace_generic_str("received traced packet");
>>> +
>>> + return 1;
>>> +}
>>> +
>>> +/*
>>> + * free all packet buffers in the range [begin, end[.
>>> + */
>>> +static void
>>> +free_pktbufs(
>>> + struct rte_mbuf **bufs,
>>> + int begin,
>>> + int end)
>>> +{
>>> + int i = begin;
>>> + for (; i < end; i++)
>>> + rte_pktmbuf_free(bufs[0]);
>>
>> rte_pktmbuf_free(bufs[i]) ?
>>
>>> +}
>>> +
>>> +/*
>>> + * return 1 in case the ball was lost (cheap check)
>>> + */
>>> +static inline void
>>> +check_ball_lost(const uint64_t dp_idle) {
>>> + /* only check if we are in running state and have a breakval */
>>> + if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
>>> + l2reflect_state == S_RUNNING &&
>>> + l2reflect_break_usec &&
>>> + record.rounds > WARMUP_ROUNDS) {
>>> + RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64 "
>> rounds\n", record.rounds);
>>> + l2reflect_state = S_LOCAL_TERM;
>>> + }
>>> +}
>>> +
>>> +/* main processing loop */
>>> +static void
>>> +l2reflect_main_loop(void)
>>> +{
>>> + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
>>> + struct rte_mbuf *m;
>>> + unsigned int lcore_id;
>>> + unsigned int j, nb_rx, nb_evt;
>>> + uint16_t portid;
>>> + /* number of consequent idle passes */
>>> + uint64_t dp_idle = 0;
>>> + uint64_t diff = 0;
>>> + int sender;
>>> + struct my_magic_packet *pkt;
>>> + struct rte_ether_hdr *eth;
>>> + struct rte_epoll_event event;
>>> +
>>> + lcore_id = rte_lcore_id();
>>> +
>>> + RTE_LOG(INFO, L2REFLECT, "entering main loop on lcore %u\n",
>> lcore_id);
>>> +
>>> + portid = l2reflect_port_number;
>>> + RTE_LOG(INFO, L2REFLECT, " -- lcoreid=%u portid=%u\n", lcore_id,
>>> + portid);
>>> + assert_link_status(portid);
>>> +
>>> +restart:
>>> + init_record();
>>> + rte_eth_stats_reset(portid);
>>> + l2reflect_state = S_ELECT_LEADER;
>>> + sender = elect_leader(portid);
>>> +
>>> + if (l2reflect_break_usec)
>>> + rte_eal_trace_generic_str("hit breakval");
>>> +
>>> + /* the leader election implements a latch (half-barrier).
>>> + * To ensure that the other party is in running state, we
>>> + * have to wait at least a full election period
>>> + */
>>> + rte_delay_ms(DELAY_ELECTION_MS * 2);
>>> +
>>> + /* we are the sender so we bring one ball into the game */
>>> + if (sender)
>>> + l2reflect_new_ball();
>>> +
>>> + /* reset the record */
>>> + init_record();
>>> + while (l2reflect_state == S_RUNNING) {
>>> + struct timespec rx_time;
>>> +
>>> + if (l2reflect_interrupt) {
>>> + rte_eth_dev_rx_intr_enable(portid, l2reflect_q);
>>> + /* wait for interrupt or timeout */
>>> + nb_evt = rte_epoll_wait(RTE_EPOLL_PER_THREAD,
>> &event, 1,
>>> + l2reflect_sleep_msec);
>>> + rte_eth_dev_rx_intr_disable(portid, l2reflect_q);
>>> + if (nb_evt == 0 && rounds > WARMUP_ROUNDS)
>>> + ++record.timeouts;
>>> + }
>>> +
>>> + nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
>>> + MAX_PKT_BURST);
>>> +
>>> + if (nb_rx) {
>>> + clock_gettime(CLOCK_MONOTONIC, &rx_time);
>>> + dp_idle = 0;
>>> + } else
>>> + ++dp_idle;
>>> +
>>> + for (j = 0; j < nb_rx; j++) {
>>> + m = pkts_burst[j];
>>> + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
>>> + pkt = (struct my_magic_packet *)eth;
>>> +
>>> + rte_prefetch0(eth);
>>> +
>>> + if (unlikely(!l2reflect_rx_filter(m))) {
>>> + rte_pktmbuf_free(m);
>>> + continue;
>>> + }
>>> +
>>> + /* remote is telling us to reset or stop */
>>> + if (unlikely(pkt->type == TRACE_TYPE_RSET)) {
>>> + free_pktbufs(pkts_burst, j, nb_rx);
>>> + goto restart;
>>> + }
>>> + if (unlikely(pkt->type == TRACE_TYPE_QUIT)) {
>>> + l2reflect_state = S_REMOTE_TERM;
>>> + free_pktbufs(pkts_burst, j, nb_rx);
>>> + break;
>>> + }
>>> +
>>> + if (likely(l2reflect_state == S_RUNNING)) {
>>> + if (unlikely(!process_packet(pkt, &rx_time,
>> &diff))) {
>>> + l2reflect_state = S_LOCAL_TERM;
>>> + free_pktbufs(pkts_burst, j, nb_rx);
>>> + break;
>>> + }
>>> + l2reflect_simple_forward(m);
>>> + }
>>> + }
>>> + check_ball_lost(dp_idle);
>>> + }
>>> +
>>> + const int state_cpy = l2reflect_state;
>>> + switch (state_cpy) {
>>> + case S_RESET_TRX:
>>> + l2reflect_send_reset();
>>> + l2reflect_state = S_ELECT_LEADER;
>>> + /* fallthrough */
>>> + case S_ELECT_LEADER:
>>> + goto restart;
>>> + }
>>> +
>>> + if (state_cpy == S_LOCAL_TERM) {
>>> + rte_eal_trace_generic_str("local termination");
>>> + l2reflect_send_quit();
>>> + } else if (state_cpy == S_REMOTE_TERM) {
>>> + RTE_LOG(INFO, L2REFLECT, "received message that remote hit
>> threshold (or is cancelled)\n");
>>> + }
>>> +}
>>> +
>>> +static int
>>> +l2reflect_launch_one_lcore(__rte_unused void *dummy)
>>> +{
>>> + struct sched_param param;
>>> + int err;
>>> +
>>> + if (sleep_start) {
>>> + RTE_LOG(INFO, L2REFLECT, "Sleeping and waiting for
>> SIGCONT\n");
>>> + while (sleep_start) {
>>> + rte_delay_ms(10);
>>> + if (l2reflect_state == S_LOCAL_TERM)
>>> + rte_exit(EXIT_SUCCESS, "Quit\n");
>>> + }
>>> + RTE_LOG(INFO, L2REFLECT, "Got SIGCONT, continuing");
>>> + }
>>> + if (l2reflect_mlock) {
>>> + err = mlockall(MCL_CURRENT | MCL_FUTURE);
>>> + if (err)
>>> + rte_exit(EXIT_FAILURE, "mlockall failed: %s\n",
>>> + strerror(errno));
>>> + }
>>> + if (priority > 0 || policy != SCHED_OTHER) {
>>> + memset(¶m, 0, sizeof(param));
>>> + param.sched_priority = priority;
>>> + err = sched_setscheduler(0, policy, ¶m);
>>> + if (err)
>>> + rte_exit(EXIT_FAILURE,
>>> + "sched_setscheduler failed: %s\n",
>>> + strerror(errno));
>>> + }
>>> + if (l2reflect_interrupt) {
>>> + err = rte_eth_dev_rx_intr_ctl_q(l2reflect_port_number,
>>> + l2reflect_q,
>>> + RTE_EPOLL_PER_THREAD,
>>> + RTE_INTR_EVENT_ADD,
>> NULL);
>>> + if (err)
>>> + rte_exit(EXIT_FAILURE,
>>> + "could not register I/O interrupt\n");
>>> + }
>>> + l2reflect_main_loop();
>>> + return 0;
>>> +}
>>> +
>>> +static void
>>> +sig_handler(int signum)
>>> +{
>>> + switch (signum) {
>>> + case SIGUSR1:
>>> + if (l2reflect_state == S_RUNNING)
>>> + l2reflect_state = S_RESET_TRX;
>>> + break;
>>> + case SIGUSR2:
>>> + l2reflect_output_hist = 1;
>>> + break;
>>> + case SIGCONT:
>>> + sleep_start = 0;
>>> + break;
>>> + case SIGHUP:
>>> + case SIGINT:
>>> + l2reflect_state = S_LOCAL_TERM;
>>> + break;
>>> + }
>>> +}
>>> +
>>> +int
>>> +main(int argc, char **argv)
>>> +{
>>> + struct rte_eth_dev_info dev_info;
>>> + struct rte_eth_txconf txconf;
>>> + int ret;
>>> + uint32_t i;
>>> + uint16_t nb_ports;
>>> + unsigned int lcore_id;
>>> + struct sigaction action;
>>> + bzero(&action, sizeof(action));
>>> + char mempool_name[128];
>>> + char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
>>> +
>>> + action.sa_handler = sig_handler;
>>> + if (sigaction(SIGHUP, &action, NULL) < 0 ||
>>> + sigaction(SIGUSR1, &action, NULL) < 0 ||
>>> + sigaction(SIGUSR2, &action, NULL) < 0 ||
>>> + sigaction(SIGCONT, &action, NULL) < 0 ||
>>> + sigaction(SIGINT, &action, NULL) < 0) {
>>> + rte_exit(EXIT_FAILURE, "Could not register signal handler\n");
>>> + }
>>> +
>>> + lcore_id = rte_lcore_id();
>>> +
>>> + if (check_opts_for_help(argc, argv, l2reflect_usage))
>>> + return 0;
>>> +
>>> + /* init EAL */
>>> + ret = rte_eal_init(argc, argv);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
>>> + argc -= ret;
>>> + argv += ret;
>>> +
>>> + /* parse application arguments (after the EAL ones) */
>>> + ret = l2reflect_parse_args(argc, argv);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE, "Invalid L2REFLECT arguments\n");
>>> +
>>> + snprintf(mempool_name, sizeof(mempool_name), "mbuf_pool_%d",
>> getpid());
>>> + RTE_LOG(DEBUG, L2REFLECT, "About to create mempool \"%s\"\n",
>> mempool_name);
>>> + /* create the mbuf pool */
>>> + l2reflect_pktmbuf_pool =
>>> + rte_pktmbuf_pool_create(mempool_name, NB_MBUF,
>>> + MAX_PKT_BURST, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
>>> + rte_socket_id());
>>> +
>>> + if (l2reflect_pktmbuf_pool == NULL)
>>> + rte_exit(EXIT_FAILURE,
>>> + "Cannot init/find mbuf pool name %s\nError: %d
>> %s\n",
>>> + mempool_name, rte_errno, rte_strerror(rte_errno));
>>> +
>>> + nb_ports = rte_eth_dev_count_avail();
>>> + if (nb_ports == 0)
>>> + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
>>> + if (l2reflect_port_number + 1 > nb_ports)
>>> + rte_exit(EXIT_FAILURE, "Chosen port %d does not exist -
>> bye\n",
>>> + l2reflect_port_number);
>>> + RTE_LOG(INFO, L2REFLECT, "We have %d ports and will use port %d\n",
>> nb_ports,
>>> + l2reflect_port_number);
>>> +
>>> + rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
>>> + RTE_LOG(INFO, L2REFLECT, "Initializing port %u ...\n",
>> l2reflect_port_number);
>>> + fflush(stdout);
>>> +
>>> + if (l2reflect_interrupt)
>>> + port_conf.intr_conf.rxq = 1;
>>> +
>>> + ret = config_port_max_pkt_len(&port_conf, &dev_info);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE,
>>> + "Invalid max packet length: %u (port %u)\n",
>>> + l2reflect_port_number, l2reflect_port_number);
>>> +
>>> + ret = rte_eth_dev_configure(l2reflect_port_number, 1, 1, &port_conf);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE,
>>> + "Cannot configure device: err=%s, port=%u\n",
>>> + strerror(-ret), l2reflect_port_number);
>>> +
>>> + ret = rte_eth_dev_adjust_nb_rx_tx_desc(l2reflect_port_number,
>> &nb_rxd, &nb_txd);
>>> + if (ret != 0)
>>> + rte_exit(EXIT_FAILURE,
>>> + "Cannot adjust # of Rx/Tx descriptors to HW limits:
>> err=%s, port=%u\n",
>>> + strerror(-ret), l2reflect_port_number);
>>> +
>>> + /* init RX queues */
>>> + for (i = 0; i <= l2reflect_q; i++) {
>>> + ret = rte_eth_rx_queue_setup(
>>> + l2reflect_port_number, i, nb_rxd,
>>> + rte_eth_dev_socket_id(l2reflect_port_number), NULL,
>>> + l2reflect_pktmbuf_pool);
>>> + if (ret < 0)
>>> + rte_exit(
>>> + EXIT_FAILURE,
>>> + "rte_eth_rx_queue_setup:err=%s, port=%u
>> q=%u\n",
>>> + strerror(-ret), l2reflect_port_number, i);
>>> + }
>>> +
>>> + /* init one TX queue on each port */
>>> + txconf = dev_info.default_txconf;
>>> + txconf.offloads = port_conf.txmode.offloads;
>>> + ret = rte_eth_tx_queue_setup(
>>> + l2reflect_port_number, 0, nb_txd,
>>> + rte_eth_dev_socket_id(l2reflect_port_number), &txconf);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE,
>>> + "rte_eth_tx_queue_setup:err=%s, port=%u\n",
>>> + strerror(-ret), (unsigned int)l2reflect_port_number);
>>> +
>>> + /* Start device */
>>> + ret = rte_eth_dev_start(l2reflect_port_number);
>>> + if (ret < 0)
>>> + rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%s, port=%u\n",
>>> + strerror(-ret), (unsigned int)l2reflect_port_number);
>>> +
>>> + rte_eth_macaddr_get(l2reflect_port_number,
>> &l2reflect_port_eth_addr);
>>> +
>>> + /*
>>> + * When running on a Tap device, we might want to use a foreign
>>> + * mac address to make sure that the application and the Tap device
>>> + * do not share the same MAC addr. By that, we enforce that the
>>> + * bridge learns this address and correctly forwards unicast packets.
>>> + */
>>> + if (l2reflect_fake_mac)
>>> + l2reflect_port_eth_addr.addr_bytes[5] += 1;
>>> +
>>> + rte_ether_format_addr(mac_str, sizeof(mac_str),
>>> + &l2reflect_port_eth_addr);
>>> + RTE_LOG(INFO, L2REFLECT, "Port %u, MAC address: %s\n\n",
>>> + (unsigned int)l2reflect_port_number, mac_str);
>>> +
>>> + /*
>>> + * in quiet mode the primary executes the main packet loop
>>> + * otherwise the one worker does it and the primary prints stats
>>> + */
>>> + if (quiet) {
>>> + assert(rte_lcore_count() == 1);
>>> +#ifdef HAS_SYS_IO
>>> + if (disable_int) {
>>> + iopl(3);
>>> + asm("cli");
>>> + }
>>> +#endif
>>> + RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n", getpid(),
>> getppid());
>>> + l2reflect_launch_one_lcore(NULL);
>>> + } else {
>>> + assert(rte_lcore_count() == 2);
>>> + /* the worker reflects the packets */
>>> + RTE_LCORE_FOREACH_WORKER(lcore_id)
>>> + {
>>> + rte_eal_remote_launch(l2reflect_launch_one_lcore,
>> NULL,
>>> + lcore_id);
>>> + }
>>> +
>>> + /* the primary prints the stats */
>>> + init_record();
>>> + l2reflect_stats_loop();
>>> + rte_eal_mp_wait_lcore();
>>> + }
>>> + rte_eal_cleanup();
>>> +
>>> + if (l2reflect_hist)
>>> + output_histogram_snapshot();
>>> +
>>> + cleanup_record();
>>> +
>>> + return 0;
>>> +}
>>> diff --git a/app/l2reflect/meson.build b/app/l2reflect/meson.build
>>> new file mode 100644
>>> index 0000000000..14b154ef06
>>> --- /dev/null
>>> +++ b/app/l2reflect/meson.build
>>> @@ -0,0 +1,21 @@
>>> +# SPDX-License-Identifier: BSD-3-Clause
>>> +# Copyright(c) 2022 Siemens AG
>>> +
>>> +cc = meson.get_compiler('c')
>>> +
>>> +jansson = dependency('libjansson', required: false)
>>> +
>>> +if not jansson.found()
>>> + jansson = cc.find_library('jansson', required: false)
>>> +endif
>>> +
>>> +if cc.has_header('sys/io.h')
>>> + cflags += '-DHAS_SYS_IO'
>>> +endif
>>> +
>>> +sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
>>> +deps += ['ethdev']
>>
>> I think some dependecies are missing:
>> ring, mbuf, mempool
>>
>>> +if jansson.found()
>>> + ext_deps += jansson
>>> + cflags += '-DRTE_HAS_JANSSON'
>>> +endif
>>> diff --git a/app/l2reflect/payload.h b/app/l2reflect/payload.h
>>> new file mode 100644
>>> index 0000000000..c1fae5d5e4
>>> --- /dev/null
>>> +++ b/app/l2reflect/payload.h
>>> @@ -0,0 +1,26 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2022 Siemens AG
>>> + */
>>> +#include <rte_ether.h>
>>> +
>>> +#ifndef _L2REFLECT_PAYLOAD_H_
>>> +#define _L2REFLECT_PAYLOAD_H_
>>> +
>>> +#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
>>> +/* IEEE Std 802 - Local Experimental Ethertype */
>>> +#define ETHER_TYPE_L2REFLECT 0x88B5
>>> +
>>> +struct my_magic_packet {
>>> + /* l2 packet header */
>>> + struct rte_ether_hdr eth;
>>> + /* type of the l2reflect packet */
>>> + uint8_t type;
>>> + /* magic easy-to-spot pattern for tracing */
>>> + uint64_t magic;
>>> + /* break if latency is larger than this */
>>> + uint64_t breakval;
>>> + /* intended size of the packet */
>>> + uint64_t req_pkt_bytes;
>>> +};
>>> +
>>> +#endif /* _L2REFLECT_PAYLOAD_H_ */
>>> diff --git a/app/l2reflect/stats.c b/app/l2reflect/stats.c
>>> new file mode 100644
>>> index 0000000000..6bcbb7a2bf
>>> --- /dev/null
>>> +++ b/app/l2reflect/stats.c
>>> @@ -0,0 +1,225 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + */
>>> +#include <stdio.h>
>>> +#include <sys/types.h>
>>> +#include <unistd.h>
>>> +#include <time.h>
>>> +#include <string.h>
>>> +#ifdef RTE_HAS_JANSSON
>>> +#include <jansson.h>
>>> +#endif
>>> +#include "colors.h"
>>> +#include "stats.h"
>>> +
>>> +unsigned int hist_bucket_usec;
>>> +struct stats record;
>>> +char *hist_filename;
>>> +
>>> +void
>>> +init_record(void)
>>> +{
>>> + record.max_round_ns = 0;
>>> + record.min_round_ns = MIN_INITIAL;
>>> + record.rounds = 0;
>>> + record.timeouts = 0;
>>> + record.avg_round_ns = 0;
>>> + if (l2reflect_hist) {
>>> + if (!record.hist_size) {
>>> + record.hist =
>>> + calloc(l2reflect_hist_buckets, sizeof(uint64_t));
>>> + record.hist_size = l2reflect_hist_buckets;
>>> + } else {
>>> + memset(record.hist, 0,
>>> + record.hist_size * sizeof(uint64_t));
>>> + }
>>> + }
>>> + clock_gettime(CLOCK_MONOTONIC, &record.time_start);
>>> +}
>>> +
>>> +void
>>> +cleanup_record(void)
>>> +{
>>> + if (l2reflect_hist) {
>>> + free(record.hist);
>>> + record.hist = NULL;
>>> + record.hist_size = 0;
>>> + }
>>> +}
>>> +
>>> +void
>>> +output_histogram_snapshot(void)
>>> +{
>>> + char *json = serialize_histogram(&record);
>>> + FILE *fd = stderr;
>>> + if (hist_filename)
>>> + fd = fopen(hist_filename, "w");
>>> + fputs(json, fd);
>>> + fputs("\n", fd);
>>> + free(json);
>>> + if (hist_filename)
>>> + fclose(fd);
>>> +}
>>> +
>>> +void
>>> +print_stats(void)
>>> +{
>>> + const char clr[] = { 27, '[', '2', 'J', '\0' };
>>> + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
>>> + const uint64_t bytes_in_gib = 0x40000000;
>>> + struct rte_eth_stats stats;
>>> + char mac_str_me[RTE_ETHER_ADDR_FMT_SIZE];
>>> + char mac_str_remote[RTE_ETHER_ADDR_FMT_SIZE];
>>> + char timeout_bound_str[32];
>>> + const char *str_jumbo = l2reflect_pkt_bytes > RTE_ETHER_MTU ?
>>> + "jumbo" : "no-jumbo";
>>> + struct timespec time_now;
>>> +
>>> + if (l2reflect_interrupt == 0)
>>> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), "
>> polling");
>>> + else if (l2reflect_sleep_msec == -1u)
>>> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">=
>> infinite");
>>> + else
>>> + snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">%9"
>> PRIu64 "ms",
>>> + l2reflect_sleep_msec);
>>> +
>>> + rte_eth_stats_get(l2reflect_port_number, &stats);
>>> + rte_ether_format_addr(mac_str_me, sizeof(mac_str_me),
>>> + &l2reflect_port_eth_addr);
>>> + rte_ether_format_addr(mac_str_remote, sizeof(mac_str_remote),
>>> + &l2reflect_remote_eth_addr);
>>> +
>>> + clock_gettime(CLOCK_MONOTONIC, &time_now);
>>> + const uint64_t time_since_start = time_now.tv_sec -
>> record.time_start.tv_sec;
>>> +
>>> + /* Clear screen and move to top left */
>>> + printf("%s%s", clr, topLeft);
>>> +
>>> + printf("%sNetworking Roundtrip Test%s\n", colors->green, colors-
>>> reset);
>>> + printf("\n%sPort statistics
>> ====================================%s",
>>> + colors->magenta, colors->reset);
>>> +
>>> + printf("\nMe: %s <--> Remote: %s", mac_str_me, mac_str_remote);
>>> + printf("\nStatistics for port %d PID %d on lcore %d ---------"
>>> + "\nState: %-16s %10" PRIu64 " s"
>>> + "\nPackets tx: %22" PRIu64 "\nPackets rx: %22" PRIu64
>>> + "\nBytes tx: %24" PRIu64 " (%8.2f GiB)"
>>> + "\nBytes rx: %24" PRIu64 " (%8.2f GiB)"
>>> + "\nErrors tx: %23" PRIu64 "\nErrors rx: %23" PRIu64
>>> + "\nTimeouts rx: %21" PRIu64 " (%s)"
>>> + "\nPacketsize [Byte]: %15" PRIu64 " (%12s)",
>>> + l2reflect_port_number, getpid(), rte_lcore_id(),
>>> + runstate_tostring(l2reflect_state),
>>> + time_since_start,
>>> + stats.opackets, stats.ipackets, stats.obytes,
>>> + (double)stats.obytes / bytes_in_gib, stats.ibytes,
>>> + (double)stats.ibytes / bytes_in_gib, stats.oerrors,
>>> + stats.ierrors, record.timeouts,
>>> + timeout_bound_str, l2reflect_pkt_bytes, str_jumbo);
>>> + printf("\n%sPort timing statistics
>> =============================%s",
>>> + colors->magenta, colors->reset);
>>> + if (l2reflect_state == S_ELECT_LEADER ||
>>> + record.min_round_ns == MIN_INITIAL) {
>>> + printf("\n\nBenchmark not started yet...\n");
>>> + } else {
>>> + printf("\n%sMax%s roundtrip: %19" PRIu64 " us", colors->red,
>>> + colors->reset, record.max_round_ns / 1000);
>>> + printf("\n%sAvg%s roundtrip: %19" PRIu64 " us", colors-
>>> yellow,
>>> + colors->reset,
>>> + record.rounds ? (uint64_t)(record.avg_round_ns /
>>> + record.rounds / 1000) :
>>> + 0);
>>> + printf("\n%sMin%s roundtrip: %19" PRIu64 " us", colors->green,
>>> + colors->reset, record.min_round_ns / 1000);
>>> + }
>>> +
>> printf("\n%s================================================
>> ====%s\n",
>>> + colors->magenta, colors->reset);
>>> +}
>>> +
>>> +void
>>> +l2reflect_stats_loop(void)
>>> +{
>>> + while (!(l2reflect_state & (S_LOCAL_TERM | S_REMOTE_TERM))) {
>>> + print_stats();
>>> + if (l2reflect_hist && l2reflect_output_hist) {
>>> + output_histogram_snapshot();
>>> + l2reflect_output_hist = 0;
>>> + }
>>> + rte_delay_us_sleep(1000000);
>>> + }
>>> +}
>>> +
>>> +char *
>>> +serialize_histogram(__rte_unused const struct stats *record)
>>> +{
>>> +#ifndef RTE_HAS_JANSSON
>>> + return strdup("to print histogram, build with jansson support");
>>> +#else
>>> + char *str = NULL;
>>> + char key[8];
>>> + unsigned int i;
>>> + json_t *hist0, *cpu0, *all_cpus, *output;
>>> +
>>> + output = json_object();
>>> + /* version: 1 */
>>> + json_object_set_new(output, "version", json_integer(1));
>>> +
>>> + /* cpu 0 histogram */
>>> + hist0 = json_object();
>>> + for (i = 0; i < record->hist_size; ++i) {
>>> + /* only log positive numbers to meet jitterplot format */
>>> + if (record->hist[i] != 0) {
>>> + snprintf(key, 8, "%u", i * hist_bucket_usec);
>>> + json_object_set(hist0, key,
>>> + json_integer(record->hist[i]));
>>> + }
>>> + }
>>> +
>>> + /* in case of empty histogram, set these values to zero */
>>> + const json_int_t min_round_us =
>>> + record->rounds ? record->min_round_ns / 1000 : 0;
>>> + const json_int_t avg_round_us =
>>> + record->rounds ? record->avg_round_ns / record->rounds /
>> 1000 : 0;
>>> + const json_int_t max_round_us =
>>> + record->rounds ? record->max_round_ns / 1000 : 0;
>>> +
>>> + /* cpu 0 stats */
>>> + cpu0 = json_object();
>>> + json_object_set_new(cpu0, "histogram", hist0);
>>> + json_object_set_new(cpu0, "count", json_integer(record->rounds));
>>> + json_object_set_new(cpu0, "min", json_integer(min_round_us));
>>> + json_object_set_new(cpu0, "max", json_integer(max_round_us));
>>> + json_object_set_new(cpu0, "avg", json_integer(avg_round_us));
>>> +
>>> + /* combine objects */
>>> + all_cpus = json_object();
>>> + json_object_set_new(all_cpus, "0", cpu0);
>>> + json_object_set_new(output, "cpu", all_cpus);
>>> +
>>> + str = json_dumps(output, JSON_ENSURE_ASCII | JSON_INDENT(2));
>>> +
>>> + /* cleanup */
>>> + json_decref(output);
>>> +
>>> + return str;
>>> +#endif
>>> +}
>>> +
>>> +const char *
>>> +runstate_tostring(int s)
>>> +{
>>> + switch (s) {
>>> + case S_ELECT_LEADER:
>>> + return "Electing";
>>> + case S_RESET_TRX:
>>> + return "Resetting";
>>> + case S_RUNNING:
>>> + return "Running";
>>> + case S_LOCAL_TERM:
>>> + return "Term. local";
>>> + case S_REMOTE_TERM:
>>> + return "Term. remote";
>>> + default:
>>> + return "Preparing";
>>> + }
>>> +}
>>> diff --git a/app/l2reflect/stats.h b/app/l2reflect/stats.h
>>> new file mode 100644
>>> index 0000000000..7f3dd9fffb
>>> --- /dev/null
>>> +++ b/app/l2reflect/stats.h
>>> @@ -0,0 +1,67 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Siemens AG
>>> + */
>>> +#ifndef _L2REFLECT_STATS_H_
>>> +#define _L2REFLECT_STATS_H_
>>> +#include <stdint.h>
>>> +#include <stdatomic.h>
>>> +#include <limits.h>
>>> +
>>> +#include <rte_ethdev.h>
>>> +
>>> +#include "l2reflect.h"
>>> +
>>> +#define MIN_INITIAL ULONG_MAX
>>> +#define HIST_NUM_BUCKETS_DEFAULT 128
>>> +
>>> +/* runtime statistics */
>>> +struct stats {
>>> + uint64_t max_round_ns;
>>> + uint64_t min_round_ns;
>>> + uint64_t rounds;
>>> + uint64_t timeouts;
>>> + double avg_round_ns;
>>> + unsigned int hist_size;
>>> + /* each slot is 10us */
>>> + uint64_t *hist;
>>> + struct timespec time_start;
>>> +};
>>> +
>>> +/* size of each histogram bucket in usec */
>>> +extern unsigned int hist_bucket_usec;
>>> +extern struct stats record;
>>> +extern char *hist_filename;
>>> +
>>> +void
>>> +init_record(void);
>>> +void
>>> +cleanup_record(void);
>>> +
>>> +void
>>> +l2reflect_stats_loop(void);
>>> +
>>> +/*
>>> + * Write the histogram to file / stdio without any locking.
>>> + * When called during the measurement, values are approximations
>>> + * (racy reads).
>>> + */
>>> +void
>>> +output_histogram_snapshot(void);
>>> +
>>> +/* Print out statistics on packets dropped */
>>> +void
>>> +print_stats(void);
>>> +
>>> +/*
>>> + * get a JSON representation of the record
>>> + */
>>> +char *
>>> +serialize_histogram(const struct stats *record);
>>> +
>>> +/*
>>> + * get a string representation of the current runstate
>>> + */
>>> +const char *
>>> +runstate_tostring(int s);
>>> +
>>> +#endif /* _L2REFLECT_STATS_H_ */
>>> diff --git a/app/l2reflect/utils.c b/app/l2reflect/utils.c
>>> new file mode 100644
>>> index 0000000000..4116b986d2
>>> --- /dev/null
>>> +++ b/app/l2reflect/utils.c
>>> @@ -0,0 +1,67 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2021 Siemens AG
>>> + */
>>> +
>>> +#include <rte_ethdev.h>
>>> +#include <rte_log.h>
>>> +#include <rte_ethdev.h>
>>> +
>>> +#include "utils.h"
>>> +#include "l2reflect.h"
>>> +
>>> +void
>>> +assert_link_status(int port_id)
>>> +{
>>> + struct rte_eth_link link;
>>> + uint8_t rep_cnt = MAX_REPEAT_TIMES;
>>> + int link_get_err = -EINVAL;
>>> +
>>> + memset(&link, 0, sizeof(link));
>>> + do {
>>> + link_get_err = rte_eth_link_get_nowait(port_id, &link);
>>> + if (link_get_err == 0 && link.link_status == RTE_ETH_LINK_UP)
>>> + break;
>>> + rte_delay_ms(CHECK_INTERVAL);
>>> + RTE_LOG(INFO, L2REFLECT, "Link not ready yet, try again...\n");
>>> + } while (--rep_cnt && (l2reflect_state != S_LOCAL_TERM));
>>> +
>>> + if (link_get_err < 0)
>>> + rte_exit(EXIT_FAILURE, "error: link get is failing: %s\n",
>>> + rte_strerror(-link_get_err));
>>> + if (link.link_status == RTE_ETH_LINK_DOWN)
>>> + rte_exit(EXIT_FAILURE, "error: link is still down\n");
>>> +
>>> + const char *linkspeed_str = rte_eth_link_speed_to_str(link.link_speed);
>>> + RTE_LOG(INFO, L2REFLECT,
>>> + "Link status on port %d: speed: %s, duplex: %s\n",
>>> + port_id, linkspeed_str,
>>> + link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX ? "full" :
>> "half");
>>> +}
>>> +
>>> +uint32_t
>>> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
>>> +{
>>> + uint32_t overhead_len;
>>> + if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
>>> + overhead_len = max_rx_pktlen - max_mtu;
>>> + else
>>> + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
>>> + return overhead_len;
>>> +}
>>> +
>>> +int
>>> +config_port_max_pkt_len(struct rte_eth_conf *conf,
>>> + struct rte_eth_dev_info
>> *dev_info)
>>> +{
>>> + uint32_t overhead_len;
>>> + if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
>>> + l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN)
>>> + return -1;
>>> + overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
>>> + dev_info->max_mtu);
>>> + conf->rxmode.mtu = MAX(l2reflect_pkt_bytes - overhead_len,
>>> + dev_info->min_mtu);
>>> + if (conf->rxmode.mtu > RTE_ETHER_MTU)
>>> + conf->txmode.offloads |=
>> RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
>>> + return 0;
>>> +}
>>> diff --git a/app/l2reflect/utils.h b/app/l2reflect/utils.h
>>> new file mode 100644
>>> index 0000000000..177ad8cda6
>>> --- /dev/null
>>> +++ b/app/l2reflect/utils.h
>>> @@ -0,0 +1,20 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2021 Siemens AG
>>> + */
>>> +
>>> +#ifndef _L2REFLECT_UTILS_H_
>>> +#define _L2REFLECT_UTILS_H_
>>> +
>>> +#define MAX_REPEAT_TIMES 30
>>> +#define CHECK_INTERVAL 2000
>>> +
>>> +void assert_link_status(int port_id);
>>> +
>>> +uint32_t
>>> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu);
>>> +
>>> +int
>>> +config_port_max_pkt_len(struct rte_eth_conf *conf,
>>> + struct rte_eth_dev_info *dev_info);
>>> +
>>> +#endif /* _L2REFLECT_UTILS_H_ */
>>> diff --git a/app/meson.build b/app/meson.build
>>> index 0ea04cadeb..3593322ab2 100644
>>> --- a/app/meson.build
>>> +++ b/app/meson.build
>>> @@ -3,6 +3,7 @@
>>>
>>> apps = [
>>> 'dumpcap',
>>> + 'l2reflect',
>>> 'pdump',
>>> 'proc-info',
>>> 'test-acl',
>
new file mode 100644
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+
+#include "colors.h"
+
+const struct color_palette *colors;
+
+static const struct color_palette color_palette_default = {
+ .red = "\x1b[01;31m",
+ .green = "\x1b[01;32m",
+ .yellow = "\x1b[01;33m",
+ .blue = "\x1b[01;34m",
+ .magenta = "\x1b[01;35m",
+ .cyan = "\x1b[01;36m",
+ .reset = "\x1b[0m"
+};
+
+static const struct color_palette color_palette_bw = { .red = "",
+ .green = "",
+ .yellow = "",
+ .blue = "",
+ .magenta = "",
+ .cyan = "",
+ .reset = "" };
+
+void
+enable_colors(int enable)
+{
+ if (enable)
+ colors = &color_palette_default;
+ else
+ colors = &color_palette_bw;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+#ifndef _L2REFLECT_COLORS_H_
+#define _L2REFLECT_COLORS_H_
+
+/* posix terminal colors */
+struct color_palette {
+ const char *red, *green, *yellow, *blue, *magenta, *cyan, *reset;
+};
+
+/* ptr to the current tui color palette */
+extern const struct color_palette *colors;
+
+/* disable colored output */
+void
+enable_colors(int enable);
+
+#endif /* _L2REFLECT_COLORS_H_ */
new file mode 100644
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+
+#include <stdatomic.h>
+
+#ifndef _L2REFLECT_L2REFLECT_H_
+#define _L2REFLECT_L2REFLECT_H_
+
+#define RTE_LOGTYPE_L2REFLECT RTE_LOGTYPE_USER1
+
+/* max size that common 1G NICs support */
+#define MAX_JUMBO_PKT_LEN 9600
+
+/* Used to compare MAC addresses. */
+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFull
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+enum {
+ TRACE_TYPE_DATA,
+ TRACE_TYPE_HELO,
+ TRACE_TYPE_EHLO,
+ TRACE_TYPE_RSET,
+ TRACE_TYPE_QUIT,
+};
+
+enum STATE {
+ /* elect the initial sender */
+ S_ELECT_LEADER = 1,
+ /* reset the counters */
+ S_RESET_TRX = 2,
+ /* measurement S_RUNNING */
+ S_RUNNING = 4,
+ /* terminated by local event */
+ S_LOCAL_TERM = 8,
+ /* terminated by remote event */
+ S_REMOTE_TERM = 16
+};
+
+extern int l2reflect_hist;
+extern unsigned int l2reflect_hist_buckets;
+extern atomic_int l2reflect_output_hist;
+extern int l2reflect_interrupt;
+extern uint64_t l2reflect_sleep_msec;
+extern uint64_t l2reflect_pkt_bytes;
+extern uint16_t l2reflect_port_number;
+extern atomic_int l2reflect_state;
+extern struct rte_ether_addr l2reflect_port_eth_addr;
+extern struct rte_ether_addr l2reflect_remote_eth_addr;
+
+#endif /* _L2REFLECT_L2REFLECT_H_ */
new file mode 100644
@@ -0,0 +1,1007 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ *
+ * The l2reflect application implements a ping-pong benchmark to
+ * measure the latency between two instances. For communication,
+ * we use raw ethernet and send one packet at a time. The timing data
+ * is collected locally and min/max/avg values are displayed in a TUI.
+ * Finally, a histogram of the latencies is printed which can be
+ * further processed with the jitterdebugger visualization scripts.
+ * To debug latency spikes, a max threshold can be defined.
+ * If it is hit, a trace point is created on both instances.
+ *
+ * Examples:
+ * launch (non-rt kernel): l2reflect --lcores 0@0,1@6 -n 1
+ * launch (rt kernel): l2reflect --lcores 0@0,1@6 -n 1 -- -P 50 -r -l
+ *
+ * For histogram data, launch with -H <usec> -F <output file>, e.g.
+ * -H 10 -F histogram.json for a histogram with 10 usec buckets which
+ * is written to a histogram.json file. This file can then be visualized
+ * using the jitterdebugger plotting scripts:
+ * jitterplot hist histogram.json
+ *
+ * While the application is running, it can be controlled by sending
+ * signals to one of the processes:
+ * - SIGUSR1: reset the min/max/avg on both instances
+ * - SIGUSR2: output / write the current histogram
+ * - SIGHUP/SIGINT: gracefully terminate both instances
+ *
+ * Note on wiring:
+ * The l2reflect application sends the packets via a physical ethernet
+ * interface. When running both instances on a single system, at least
+ * two dedicated physical ports and a (physical) loopback between them
+ * is required.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <time.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <sys/signal.h>
+#include <assert.h>
+#include <unistd.h>
+#ifdef HAS_SYS_IO
+#include <sys/io.h>
+#endif
+#include <sched.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_trace.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include "l2reflect.h"
+#include "payload.h"
+#include "utils.h"
+#include "colors.h"
+#include "stats.h"
+
+#define NSEC_PER_SEC 1000000000
+
+#define NB_MBUF 2047
+
+#define MAX_PKT_BURST 32
+/* warmup a few round before starting the measurement */
+#define WARMUP_ROUNDS 42
+
+/* break after one second */
+#define DEFAULT_BREAKVAL_USEC 1000000ull
+/* break if no rx for more than this rounds */
+#define RX_TIMEOUT_MASK ~0xFFFFFull
+
+/* delay between two election packets */
+#define DELAY_ELECTION_MS 500
+
+int l2reflect_hist;
+unsigned int l2reflect_hist_buckets = HIST_NUM_BUCKETS_DEFAULT;
+atomic_int l2reflect_output_hist;
+int l2reflect_fake_mac;
+int l2reflect_interrupt;
+uint64_t l2reflect_sleep_msec;
+uint64_t l2reflect_pkt_bytes = 64;
+uint16_t l2reflect_port_number;
+atomic_int l2reflect_state;
+struct rte_ether_addr l2reflect_port_eth_addr;
+struct rte_ether_addr l2reflect_remote_eth_addr;
+
+static struct timespec last_sent, last_recv;
+static int quiet, disable_int, priority, policy, l2reflect_mlock;
+
+static atomic_int sleep_start;
+static uint64_t rounds;
+
+/* Configurable number of RX/TX ring descriptors */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 128
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ },
+ .txmode = {
+ .mq_mode = RTE_ETH_MQ_TX_NONE,
+ },
+};
+
+static uint32_t l2reflect_q;
+static uint64_t l2reflect_break_usec = DEFAULT_BREAKVAL_USEC;
+
+static struct rte_ether_addr ether_bcast_addr = {
+ .addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+};
+
+struct rte_mempool *l2reflect_pktmbuf_pool;
+
+static void
+l2reflect_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- [-p PORT] -P [PRIO] [-b USEC] [-n SIZE] [-r] [-f] [-l]"
+ "[-q] [-d] [-H USEC] [-B NUM] [-F FILE] [-S] [-i MSEC] [-m] [-c] [-h]\n"
+ " -p PORT: port to configure\n"
+ " -P PRIO: scheduling priority to use\n"
+ " -b USEC: break when latency > USEC\n"
+ " -n SIZE: size of packet in bytes [%i,%i]\n"
+ " (when using jumbo frames, sender and receiver values have to match)\n"
+ " -r: scheduling policy SCHED_RR\n"
+ " -f: scheduling policy SCHED_FIFO\n"
+ " -l: lock memory (mlockall)\n"
+ " -q: quiet, do not print stats\n"
+#ifdef HAS_SYS_IO
+ " -d: ignore maskable interrupts\n"
+#endif
+ " -H USEC: create histogram of latencies with USEC time slices\n"
+ " -B NUM: number of histogram buckets\n"
+ " -F FILE: write histogram to file\n"
+ " -S: start processing threads in sleep, wake with SIGCONT\n"
+ " -i MSEC: use interrupts instead of polling (cont. on interrupt or after MSEC)\n"
+ " -m: fake the source mac addr by adding 1 to the last tuple\n"
+ " -c: disable colored output\n"
+ " -h: display help message\n",
+ prgname, RTE_ETHER_MIN_LEN, MAX_JUMBO_PKT_LEN);
+}
+
+static int
+check_opts_for_help(int argc, char **argv, void(*display_help)(const char *))
+{
+ if (argc > 2 && !strncmp(argv[1], "--", 3)) {
+ if (!strncmp(argv[2], "-h", 3) || !strncmp(argv[2], "--help", 7)) {
+ display_help(argv[0]);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2reflect_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ int opt_colors = 1;
+ char *prgname = argv[0];
+ static struct option lgopts[] = { { NULL, 0, 0, 0 } };
+
+ argvopt = argv;
+ policy = SCHED_OTHER;
+ hist_filename = NULL;
+ l2reflect_output_hist = 0;
+
+ while ((opt = getopt_long(argc, argvopt, "p:P:b:H:B:F:i:n:qdrflScm", lgopts,
+ &option_index)) != EOF) {
+ switch (opt) {
+ /* port */
+ case 'p':
+ l2reflect_port_number =
+ (uint16_t)strtoul(optarg, NULL, 10);
+ break;
+ case 'P':
+ priority = strtoul(optarg, NULL, 10);
+ if (priority > 0) {
+ if (policy == SCHED_OTHER)
+ policy = SCHED_RR;
+ l2reflect_mlock = 1;
+ }
+ break;
+ case 'b':
+ l2reflect_break_usec = strtoul(optarg, NULL, 10);
+ break;
+ case 'S':
+ sleep_start = 1;
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'd':
+ disable_int = 1;
+ break;
+ case 'r':
+ policy = SCHED_RR;
+ break;
+ case 'f':
+ policy = SCHED_FIFO;
+ break;
+ case 'l':
+ l2reflect_mlock = 1;
+ break;
+ case 'H':
+ l2reflect_hist = 1;
+ hist_bucket_usec = strtoul(optarg, NULL, 10);
+#ifndef RTE_HAS_JANSSON
+ printf("not compiled with cjson support\n");
+ return -1;
+#endif
+ break;
+ case 'B':
+ l2reflect_hist_buckets = strtoul(optarg, NULL, 10);
+ break;
+ case 'F':
+ hist_filename = strndup(optarg, 128);
+ break;
+ case 'i':
+ l2reflect_interrupt = 1;
+ l2reflect_sleep_msec = strtoul(optarg, NULL, 10);
+ break;
+ case 'n':
+ l2reflect_pkt_bytes = strtoull(optarg, NULL, 10);
+ if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
+ l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN) {
+ printf("packet size %" PRIu64 " not valid\n", l2reflect_pkt_bytes);
+ return -1;
+ }
+ if (l2reflect_pkt_bytes > RTE_MBUF_DEFAULT_DATAROOM) {
+ printf("NOT IMPLEMENTED. Packet size %" PRIu64 " requires segmented buffers.\n",
+ l2reflect_pkt_bytes);
+ return -1;
+ }
+ break;
+ case 'c':
+ opt_colors = 0;
+ break;
+ case 'm':
+ l2reflect_fake_mac = 1;
+ break;
+ default:
+ l2reflect_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind - 1] = prgname;
+
+ if (hist_filename && !l2reflect_hist) {
+ printf("-F switch requires -H switch as well\n");
+ return -1;
+ }
+
+ /* output is redirected, disable coloring */
+ if (!isatty(fileno(stdout)))
+ opt_colors = 0;
+
+ enable_colors(opt_colors);
+
+ ret = optind - 1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Send a burst of one packet */
+static inline int
+l2reflect_send_packet(struct rte_mbuf **m, uint16_t port)
+{
+ unsigned int ret;
+ struct rte_ether_hdr *eth;
+ struct my_magic_packet *pkt;
+ uint16_t type;
+
+ eth = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *);
+ pkt = (struct my_magic_packet *)eth;
+ type = pkt->type;
+
+ if (likely(type == TRACE_TYPE_DATA))
+ clock_gettime(CLOCK_MONOTONIC, &last_sent);
+ ret = rte_eth_tx_burst(port, l2reflect_q, m, 1);
+ if (unlikely(ret < 1))
+ rte_pktmbuf_free(*m);
+ return 0;
+}
+
+static inline void
+l2reflect_simple_forward(struct rte_mbuf *m)
+{
+ struct rte_ether_hdr *eth;
+ struct my_magic_packet *pkt;
+
+ eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ pkt = (struct my_magic_packet *)eth;
+
+ /* dst addr */
+ rte_ether_addr_copy(ð->src_addr, ð->dst_addr);
+ /* src addr */
+ rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
+
+ if (unlikely(pkt->magic == MAGIC_TRACE_PAYLOAD))
+ rte_eal_trace_generic_str("sending traced packet");
+
+ l2reflect_send_packet(&m, l2reflect_port_number);
+}
+
+static struct rte_mbuf *
+l2reflect_new_pkt(unsigned int type)
+{
+ struct rte_mbuf *m;
+ struct rte_ether_hdr *eth;
+ struct my_magic_packet *pkt;
+ uint64_t frame_bytes = RTE_ETHER_MIN_LEN;
+
+ m = rte_pktmbuf_alloc(l2reflect_pktmbuf_pool);
+ if (m == NULL)
+ rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc failed\n");
+
+ eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+
+ if (type == TRACE_TYPE_DATA)
+ frame_bytes = l2reflect_pkt_bytes;
+
+ /* zero out packet to make dumps better readable */
+ memset(eth, 0, frame_bytes - RTE_ETHER_CRC_LEN);
+
+ if (type == TRACE_TYPE_HELO)
+ rte_ether_addr_copy(ðer_bcast_addr, ð->dst_addr);
+ else
+ rte_ether_addr_copy(&l2reflect_remote_eth_addr, ð->dst_addr);
+
+ /* src addr */
+ rte_ether_addr_copy(&l2reflect_port_eth_addr, ð->src_addr);
+ eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT);
+
+ m->data_len = frame_bytes - RTE_ETHER_CRC_LEN;
+ m->pkt_len = frame_bytes - RTE_ETHER_CRC_LEN;
+
+ pkt = (struct my_magic_packet *)eth;
+ pkt->type = type;
+ pkt->breakval = l2reflect_break_usec;
+ pkt->req_pkt_bytes = l2reflect_pkt_bytes;
+
+ return m;
+}
+
+static void
+l2reflect_send_reset(void)
+{
+ struct rte_mbuf *m;
+ m = l2reflect_new_pkt(TRACE_TYPE_RSET);
+ l2reflect_send_packet(&m, l2reflect_port_number);
+}
+
+static void
+l2reflect_send_quit(void)
+{
+ struct rte_mbuf *m;
+ m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
+ l2reflect_send_packet(&m, l2reflect_port_number);
+}
+
+static void
+l2reflect_new_ball(void)
+{
+ struct rte_mbuf *pnewball;
+ struct rte_ether_hdr *eth;
+ struct my_magic_packet *pkt;
+ char mac_src_str[RTE_ETHER_ADDR_FMT_SIZE];
+ char mac_dst_str[RTE_ETHER_ADDR_FMT_SIZE];
+
+ RTE_LOG(INFO, L2REFLECT, "Should create a packet to play with ...\n");
+ pnewball = l2reflect_new_pkt(TRACE_TYPE_DATA);
+
+ eth = rte_pktmbuf_mtod(pnewball, struct rte_ether_hdr *);
+
+ rte_ether_format_addr(mac_src_str, sizeof(mac_src_str), &l2reflect_port_eth_addr);
+ rte_ether_format_addr(mac_dst_str, sizeof(mac_dst_str), &l2reflect_remote_eth_addr);
+ RTE_LOG(INFO, L2REFLECT, "from MAC address: %s to %s\n\n", mac_src_str, mac_dst_str);
+
+ pkt = (struct my_magic_packet *)eth;
+
+ /* we are tracing lets tell the others */
+ if (l2reflect_break_usec)
+ pkt->magic = MAGIC_TRACE_PAYLOAD;
+
+ l2reflect_send_packet(&pnewball, l2reflect_port_number);
+}
+
+static inline int64_t
+calcdiff_ns(struct timespec t1, struct timespec t2)
+{
+ int64_t diff;
+ diff = NSEC_PER_SEC * (int64_t)((int)t1.tv_sec - (int)t2.tv_sec);
+ diff += ((int)t1.tv_nsec - (int)t2.tv_nsec);
+ return diff;
+}
+
+/* filter the received packets for actual l2reflect messages */
+static inline unsigned int
+l2reflect_rx_filter(
+ struct rte_mbuf *buf)
+{
+ struct rte_ether_hdr *eth;
+ eth = rte_pktmbuf_mtod(buf, struct rte_ether_hdr *);
+
+ if (unlikely(buf->nb_segs > 1))
+ RTE_LOG(WARNING, L2REFLECT, "Segmented packet: data-len: %i, pkt-len: %i, #seg: %i\n",
+ buf->data_len, buf->pkt_len, buf->nb_segs);
+
+ /* check for the l2reflect ether type */
+ if (unlikely(eth->ether_type != rte_cpu_to_be_16(ETHER_TYPE_L2REFLECT)))
+ return 0;
+
+ /*
+ * if the packet is not from our partner
+ * (and we already have a partner), drop it
+ */
+ if (unlikely(l2reflect_state != S_ELECT_LEADER &&
+ !rte_is_same_ether_addr(ð->src_addr, &l2reflect_remote_eth_addr)))
+ return 0;
+
+ /* filter bounce-back packets */
+ if (unlikely(rte_is_same_ether_addr(ð->src_addr, &l2reflect_port_eth_addr)))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * automatically elect the leader of the benchmark by
+ * sending out HELO packets and waiting for responses.
+ * On response, the mac addresses are compared and the
+ * numerically larger one becomes the leader.
+ */
+static int
+elect_leader(uint16_t portid)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ struct rte_ether_hdr *eth;
+ struct rte_ether_addr *src_addr;
+ struct rte_eth_dev_info dev_info;
+ struct my_magic_packet *pkt;
+ unsigned int i, nb_rx;
+ int ehlo_send = 0;
+ int i_win;
+
+ while (l2reflect_state == S_ELECT_LEADER) {
+ /* send a packet to make sure the MAC addr of this interface is publicly known */
+ m = l2reflect_new_pkt(TRACE_TYPE_HELO);
+ RTE_LOG(INFO, L2REFLECT, "looking for player HELO\n");
+ l2reflect_send_packet(&m, l2reflect_port_number);
+ rte_delay_ms(DELAY_ELECTION_MS);
+
+ /* receive election packets */
+ nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
+ MAX_PKT_BURST);
+
+ /* note: do not short-circuit as otherwise the mbufs are not freed */
+ for (i = 0; i < nb_rx; i++) {
+ m = pkts_burst[i];
+ eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ src_addr = ð->src_addr;
+ pkt = (struct my_magic_packet *)eth;
+
+ if (!l2reflect_rx_filter(m)) {
+ rte_pktmbuf_free(m);
+ continue;
+ }
+
+ if (pkt->type == TRACE_TYPE_EHLO && l2reflect_state == S_ELECT_LEADER) {
+ /* check if both modes are equal */
+ if (((l2reflect_pkt_bytes <= RTE_ETHER_MTU)
+ != (pkt->req_pkt_bytes <= RTE_ETHER_MTU))) {
+ l2reflect_state = S_LOCAL_TERM;
+ m = l2reflect_new_pkt(TRACE_TYPE_QUIT);
+ l2reflect_send_packet(&m, l2reflect_port_number);
+ rte_exit(EXIT_FAILURE,
+ "remote and local jumbo config does not match "
+ "(%" PRIu64 " vs %" PRIu64 ")\n",
+ l2reflect_pkt_bytes, pkt->req_pkt_bytes);
+ }
+ if (l2reflect_pkt_bytes != pkt->req_pkt_bytes) {
+ l2reflect_pkt_bytes = MAX(l2reflect_pkt_bytes,
+ pkt->req_pkt_bytes);
+ rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
+ const uint32_t overhead_len = eth_dev_get_overhead_len(
+ dev_info.max_rx_pktlen,
+ dev_info.max_mtu);
+ const uint16_t mtu = MAX(l2reflect_pkt_bytes - overhead_len,
+ dev_info.min_mtu);
+ RTE_LOG(INFO, L2REFLECT,
+ "update frame sizes: frame: %" PRIu64 ", MTU %d\n",
+ l2reflect_pkt_bytes, mtu);
+ const int ret = rte_eth_dev_set_mtu(
+ l2reflect_port_number,
+ mtu);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "failed to update MTU: %s\n",
+ strerror(-ret));
+ }
+
+ if (ehlo_send) {
+ l2reflect_state = S_RUNNING;
+ RTE_LOG(INFO, L2REFLECT, "Enter running state\n");
+ }
+ }
+ /* we got a HELO packet, respond with EHLO */
+ if (pkt->type == TRACE_TYPE_HELO) {
+ char mac_str_other[RTE_ETHER_ADDR_FMT_SIZE];
+ rte_ether_addr_copy(src_addr, &l2reflect_remote_eth_addr);
+ m = l2reflect_new_pkt(TRACE_TYPE_EHLO);
+ rte_ether_format_addr(
+ mac_str_other, sizeof(mac_str_other), &l2reflect_remote_eth_addr);
+ RTE_LOG(INFO, L2REFLECT, "found one HELO from %s\n", mac_str_other);
+ l2reflect_send_packet(&m, l2reflect_port_number);
+ ehlo_send = 1;
+ }
+ rte_pktmbuf_free(m);
+ }
+ }
+
+ if (rte_is_same_ether_addr(&l2reflect_port_eth_addr, &l2reflect_remote_eth_addr))
+ rte_exit(EXIT_FAILURE, "talking to myself ... confused\n");
+
+ /* the one with the bigger MAC is the leader */
+ i_win = ((*((uint64_t *)&l2reflect_port_eth_addr) & MAC_ADDR_CMP) >
+ (*((uint64_t *)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));
+
+ RTE_LOG(INFO, L2REFLECT, "i am the \"%s\"\n", i_win ? "rick" : "morty");
+
+ return i_win;
+}
+
+/*
+ * add the measured time diff to the statistics.
+ * return false if threshold is hit
+ */
+static inline int
+add_to_record(const uint64_t diff)
+{
+ record.rounds++;
+ /* do not count the first rounds, diff would be too high */
+ if (record.rounds < WARMUP_ROUNDS)
+ return 1;
+
+ if (l2reflect_hist) {
+ const uint64_t bucket =
+ MIN(diff / (hist_bucket_usec * 1000), l2reflect_hist_buckets-1);
+ record.hist[bucket]++;
+ }
+
+ record.avg_round_ns += (double)diff;
+ if (diff < record.min_round_ns)
+ record.min_round_ns = diff;
+ if (diff > record.max_round_ns) {
+ record.max_round_ns = diff;
+ if (l2reflect_break_usec &&
+ (record.max_round_ns > (l2reflect_break_usec * 1000)))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * process a single packet.
+ * return false if latency threshold is hit
+ */
+static inline int
+process_packet(
+ struct my_magic_packet *pkt,
+ struct timespec *rx_time,
+ uint64_t *diff)
+{
+ if (pkt->type == TRACE_TYPE_DATA) {
+ rte_memcpy(&last_recv, rx_time, sizeof(*rx_time));
+ *diff = calcdiff_ns(last_recv, last_sent);
+ if (!unlikely(add_to_record(*diff))) {
+ /* TODO: improve tracing */
+ rte_eal_trace_generic_u64(record.max_round_ns / 1000);
+ return 0;
+ }
+ }
+ if (pkt->magic == MAGIC_TRACE_PAYLOAD)
+ rte_eal_trace_generic_str("received traced packet");
+
+ return 1;
+}
+
+/*
+ * free all packet buffers in the range [begin, end[.
+ */
+static void
+free_pktbufs(
+ struct rte_mbuf **bufs,
+ int begin,
+ int end)
+{
+ int i = begin;
+ for (; i < end; i++)
+ rte_pktmbuf_free(bufs[0]);
+}
+
+/*
+ * return 1 in case the ball was lost (cheap check)
+ */
+static inline void
+check_ball_lost(const uint64_t dp_idle) {
+ /* only check if we are in running state and have a breakval */
+ if (unlikely(dp_idle & RX_TIMEOUT_MASK) &&
+ l2reflect_state == S_RUNNING &&
+ l2reflect_break_usec &&
+ record.rounds > WARMUP_ROUNDS) {
+ RTE_LOG(INFO, L2REFLECT, "lost ball after %" PRIu64 " rounds\n", record.rounds);
+ l2reflect_state = S_LOCAL_TERM;
+ }
+}
+
+/* main processing loop */
+static void
+l2reflect_main_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ unsigned int lcore_id;
+ unsigned int j, nb_rx, nb_evt;
+ uint16_t portid;
+ /* number of consequent idle passes */
+ uint64_t dp_idle = 0;
+ uint64_t diff = 0;
+ int sender;
+ struct my_magic_packet *pkt;
+ struct rte_ether_hdr *eth;
+ struct rte_epoll_event event;
+
+ lcore_id = rte_lcore_id();
+
+ RTE_LOG(INFO, L2REFLECT, "entering main loop on lcore %u\n", lcore_id);
+
+ portid = l2reflect_port_number;
+ RTE_LOG(INFO, L2REFLECT, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ assert_link_status(portid);
+
+restart:
+ init_record();
+ rte_eth_stats_reset(portid);
+ l2reflect_state = S_ELECT_LEADER;
+ sender = elect_leader(portid);
+
+ if (l2reflect_break_usec)
+ rte_eal_trace_generic_str("hit breakval");
+
+ /* the leader election implements a latch (half-barrier).
+ * To ensure that the other party is in running state, we
+ * have to wait at least a full election period
+ */
+ rte_delay_ms(DELAY_ELECTION_MS * 2);
+
+ /* we are the sender so we bring one ball into the game */
+ if (sender)
+ l2reflect_new_ball();
+
+ /* reset the record */
+ init_record();
+ while (l2reflect_state == S_RUNNING) {
+ struct timespec rx_time;
+
+ if (l2reflect_interrupt) {
+ rte_eth_dev_rx_intr_enable(portid, l2reflect_q);
+ /* wait for interrupt or timeout */
+ nb_evt = rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1,
+ l2reflect_sleep_msec);
+ rte_eth_dev_rx_intr_disable(portid, l2reflect_q);
+ if (nb_evt == 0 && rounds > WARMUP_ROUNDS)
+ ++record.timeouts;
+ }
+
+ nb_rx = rte_eth_rx_burst(portid, l2reflect_q, pkts_burst,
+ MAX_PKT_BURST);
+
+ if (nb_rx) {
+ clock_gettime(CLOCK_MONOTONIC, &rx_time);
+ dp_idle = 0;
+ } else
+ ++dp_idle;
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ pkt = (struct my_magic_packet *)eth;
+
+ rte_prefetch0(eth);
+
+ if (unlikely(!l2reflect_rx_filter(m))) {
+ rte_pktmbuf_free(m);
+ continue;
+ }
+
+ /* remote is telling us to reset or stop */
+ if (unlikely(pkt->type == TRACE_TYPE_RSET)) {
+ free_pktbufs(pkts_burst, j, nb_rx);
+ goto restart;
+ }
+ if (unlikely(pkt->type == TRACE_TYPE_QUIT)) {
+ l2reflect_state = S_REMOTE_TERM;
+ free_pktbufs(pkts_burst, j, nb_rx);
+ break;
+ }
+
+ if (likely(l2reflect_state == S_RUNNING)) {
+ if (unlikely(!process_packet(pkt, &rx_time, &diff))) {
+ l2reflect_state = S_LOCAL_TERM;
+ free_pktbufs(pkts_burst, j, nb_rx);
+ break;
+ }
+ l2reflect_simple_forward(m);
+ }
+ }
+ check_ball_lost(dp_idle);
+ }
+
+ const int state_cpy = l2reflect_state;
+ switch (state_cpy) {
+ case S_RESET_TRX:
+ l2reflect_send_reset();
+ l2reflect_state = S_ELECT_LEADER;
+ /* fallthrough */
+ case S_ELECT_LEADER:
+ goto restart;
+ }
+
+ if (state_cpy == S_LOCAL_TERM) {
+ rte_eal_trace_generic_str("local termination");
+ l2reflect_send_quit();
+ } else if (state_cpy == S_REMOTE_TERM) {
+ RTE_LOG(INFO, L2REFLECT, "received message that remote hit threshold (or is cancelled)\n");
+ }
+}
+
+static int
+l2reflect_launch_one_lcore(__rte_unused void *dummy)
+{
+ struct sched_param param;
+ int err;
+
+ if (sleep_start) {
+ RTE_LOG(INFO, L2REFLECT, "Sleeping and waiting for SIGCONT\n");
+ while (sleep_start) {
+ rte_delay_ms(10);
+ if (l2reflect_state == S_LOCAL_TERM)
+ rte_exit(EXIT_SUCCESS, "Quit\n");
+ }
+ RTE_LOG(INFO, L2REFLECT, "Got SIGCONT, continuing");
+ }
+ if (l2reflect_mlock) {
+ err = mlockall(MCL_CURRENT | MCL_FUTURE);
+ if (err)
+ rte_exit(EXIT_FAILURE, "mlockall failed: %s\n",
+ strerror(errno));
+ }
+ if (priority > 0 || policy != SCHED_OTHER) {
+ memset(¶m, 0, sizeof(param));
+ param.sched_priority = priority;
+ err = sched_setscheduler(0, policy, ¶m);
+ if (err)
+ rte_exit(EXIT_FAILURE,
+ "sched_setscheduler failed: %s\n",
+ strerror(errno));
+ }
+ if (l2reflect_interrupt) {
+ err = rte_eth_dev_rx_intr_ctl_q(l2reflect_port_number,
+ l2reflect_q,
+ RTE_EPOLL_PER_THREAD,
+ RTE_INTR_EVENT_ADD, NULL);
+ if (err)
+ rte_exit(EXIT_FAILURE,
+ "could not register I/O interrupt\n");
+ }
+ l2reflect_main_loop();
+ return 0;
+}
+
+static void
+sig_handler(int signum)
+{
+ switch (signum) {
+ case SIGUSR1:
+ if (l2reflect_state == S_RUNNING)
+ l2reflect_state = S_RESET_TRX;
+ break;
+ case SIGUSR2:
+ l2reflect_output_hist = 1;
+ break;
+ case SIGCONT:
+ sleep_start = 0;
+ break;
+ case SIGHUP:
+ case SIGINT:
+ l2reflect_state = S_LOCAL_TERM;
+ break;
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf txconf;
+ int ret;
+ uint32_t i;
+ uint16_t nb_ports;
+ unsigned int lcore_id;
+ struct sigaction action;
+ bzero(&action, sizeof(action));
+ char mempool_name[128];
+ char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
+
+ action.sa_handler = sig_handler;
+ if (sigaction(SIGHUP, &action, NULL) < 0 ||
+ sigaction(SIGUSR1, &action, NULL) < 0 ||
+ sigaction(SIGUSR2, &action, NULL) < 0 ||
+ sigaction(SIGCONT, &action, NULL) < 0 ||
+ sigaction(SIGINT, &action, NULL) < 0) {
+ rte_exit(EXIT_FAILURE, "Could not register signal handler\n");
+ }
+
+ lcore_id = rte_lcore_id();
+
+ if (check_opts_for_help(argc, argv, l2reflect_usage))
+ return 0;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2reflect_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2REFLECT arguments\n");
+
+ snprintf(mempool_name, sizeof(mempool_name), "mbuf_pool_%d", getpid());
+ RTE_LOG(DEBUG, L2REFLECT, "About to create mempool \"%s\"\n", mempool_name);
+ /* create the mbuf pool */
+ l2reflect_pktmbuf_pool =
+ rte_pktmbuf_pool_create(mempool_name, NB_MBUF,
+ MAX_PKT_BURST, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_socket_id());
+
+ if (l2reflect_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Cannot init/find mbuf pool name %s\nError: %d %s\n",
+ mempool_name, rte_errno, rte_strerror(rte_errno));
+
+ nb_ports = rte_eth_dev_count_avail();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+ if (l2reflect_port_number + 1 > nb_ports)
+ rte_exit(EXIT_FAILURE, "Chosen port %d does not exist - bye\n",
+ l2reflect_port_number);
+ RTE_LOG(INFO, L2REFLECT, "We have %d ports and will use port %d\n", nb_ports,
+ l2reflect_port_number);
+
+ rte_eth_dev_info_get(l2reflect_port_number, &dev_info);
+ RTE_LOG(INFO, L2REFLECT, "Initializing port %u ...\n", l2reflect_port_number);
+ fflush(stdout);
+
+ if (l2reflect_interrupt)
+ port_conf.intr_conf.rxq = 1;
+
+ ret = config_port_max_pkt_len(&port_conf, &dev_info);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Invalid max packet length: %u (port %u)\n",
+ l2reflect_port_number, l2reflect_port_number);
+
+ ret = rte_eth_dev_configure(l2reflect_port_number, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure device: err=%s, port=%u\n",
+ strerror(-ret), l2reflect_port_number);
+
+ ret = rte_eth_dev_adjust_nb_rx_tx_desc(l2reflect_port_number, &nb_rxd, &nb_txd);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot adjust # of Rx/Tx descriptors to HW limits: err=%s, port=%u\n",
+ strerror(-ret), l2reflect_port_number);
+
+ /* init RX queues */
+ for (i = 0; i <= l2reflect_q; i++) {
+ ret = rte_eth_rx_queue_setup(
+ l2reflect_port_number, i, nb_rxd,
+ rte_eth_dev_socket_id(l2reflect_port_number), NULL,
+ l2reflect_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(
+ EXIT_FAILURE,
+ "rte_eth_rx_queue_setup:err=%s, port=%u q=%u\n",
+ strerror(-ret), l2reflect_port_number, i);
+ }
+
+ /* init one TX queue on each port */
+ txconf = dev_info.default_txconf;
+ txconf.offloads = port_conf.txmode.offloads;
+ ret = rte_eth_tx_queue_setup(
+ l2reflect_port_number, 0, nb_txd,
+ rte_eth_dev_socket_id(l2reflect_port_number), &txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup:err=%s, port=%u\n",
+ strerror(-ret), (unsigned int)l2reflect_port_number);
+
+ /* Start device */
+ ret = rte_eth_dev_start(l2reflect_port_number);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%s, port=%u\n",
+ strerror(-ret), (unsigned int)l2reflect_port_number);
+
+ rte_eth_macaddr_get(l2reflect_port_number, &l2reflect_port_eth_addr);
+
+ /*
+ * When running on a Tap device, we might want to use a foreign
+ * mac address to make sure that the application and the Tap device
+ * do not share the same MAC addr. By that, we enforce that the
+ * bridge learns this address and correctly forwards unicast packets.
+ */
+ if (l2reflect_fake_mac)
+ l2reflect_port_eth_addr.addr_bytes[5] += 1;
+
+ rte_ether_format_addr(mac_str, sizeof(mac_str),
+ &l2reflect_port_eth_addr);
+ RTE_LOG(INFO, L2REFLECT, "Port %u, MAC address: %s\n\n",
+ (unsigned int)l2reflect_port_number, mac_str);
+
+ /*
+ * in quiet mode the primary executes the main packet loop
+ * otherwise the one worker does it and the primary prints stats
+ */
+ if (quiet) {
+ assert(rte_lcore_count() == 1);
+#ifdef HAS_SYS_IO
+ if (disable_int) {
+ iopl(3);
+ asm("cli");
+ }
+#endif
+ RTE_LOG(INFO, L2REFLECT, "PID %i, Parent %i\n", getpid(), getppid());
+ l2reflect_launch_one_lcore(NULL);
+ } else {
+ assert(rte_lcore_count() == 2);
+ /* the worker reflects the packets */
+ RTE_LCORE_FOREACH_WORKER(lcore_id)
+ {
+ rte_eal_remote_launch(l2reflect_launch_one_lcore, NULL,
+ lcore_id);
+ }
+
+ /* the primary prints the stats */
+ init_record();
+ l2reflect_stats_loop();
+ rte_eal_mp_wait_lcore();
+ }
+ rte_eal_cleanup();
+
+ if (l2reflect_hist)
+ output_histogram_snapshot();
+
+ cleanup_record();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Siemens AG
+
+cc = meson.get_compiler('c')
+
+jansson = dependency('libjansson', required: false)
+
+if not jansson.found()
+ jansson = cc.find_library('jansson', required: false)
+endif
+
+if cc.has_header('sys/io.h')
+ cflags += '-DHAS_SYS_IO'
+endif
+
+sources = files('main.c', 'utils.c', 'stats.c', 'colors.c')
+deps += ['ethdev']
+if jansson.found()
+ ext_deps += jansson
+ cflags += '-DRTE_HAS_JANSSON'
+endif
new file mode 100644
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Siemens AG
+ */
+#include <rte_ether.h>
+
+#ifndef _L2REFLECT_PAYLOAD_H_
+#define _L2REFLECT_PAYLOAD_H_
+
+#define MAGIC_TRACE_PAYLOAD 0xd00faffeaffed00full
+/* IEEE Std 802 - Local Experimental Ethertype */
+#define ETHER_TYPE_L2REFLECT 0x88B5
+
+struct my_magic_packet {
+ /* l2 packet header */
+ struct rte_ether_hdr eth;
+ /* type of the l2reflect packet */
+ uint8_t type;
+ /* magic easy-to-spot pattern for tracing */
+ uint64_t magic;
+ /* break if latency is larger than this */
+ uint64_t breakval;
+ /* intended size of the packet */
+ uint64_t req_pkt_bytes;
+};
+
+#endif /* _L2REFLECT_PAYLOAD_H_ */
new file mode 100644
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#ifdef RTE_HAS_JANSSON
+#include <jansson.h>
+#endif
+#include "colors.h"
+#include "stats.h"
+
+unsigned int hist_bucket_usec;
+struct stats record;
+char *hist_filename;
+
+void
+init_record(void)
+{
+ record.max_round_ns = 0;
+ record.min_round_ns = MIN_INITIAL;
+ record.rounds = 0;
+ record.timeouts = 0;
+ record.avg_round_ns = 0;
+ if (l2reflect_hist) {
+ if (!record.hist_size) {
+ record.hist =
+ calloc(l2reflect_hist_buckets, sizeof(uint64_t));
+ record.hist_size = l2reflect_hist_buckets;
+ } else {
+ memset(record.hist, 0,
+ record.hist_size * sizeof(uint64_t));
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &record.time_start);
+}
+
+void
+cleanup_record(void)
+{
+ if (l2reflect_hist) {
+ free(record.hist);
+ record.hist = NULL;
+ record.hist_size = 0;
+ }
+}
+
+void
+output_histogram_snapshot(void)
+{
+ char *json = serialize_histogram(&record);
+ FILE *fd = stderr;
+ if (hist_filename)
+ fd = fopen(hist_filename, "w");
+ fputs(json, fd);
+ fputs("\n", fd);
+ free(json);
+ if (hist_filename)
+ fclose(fd);
+}
+
+void
+print_stats(void)
+{
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
+ const uint64_t bytes_in_gib = 0x40000000;
+ struct rte_eth_stats stats;
+ char mac_str_me[RTE_ETHER_ADDR_FMT_SIZE];
+ char mac_str_remote[RTE_ETHER_ADDR_FMT_SIZE];
+ char timeout_bound_str[32];
+ const char *str_jumbo = l2reflect_pkt_bytes > RTE_ETHER_MTU ?
+ "jumbo" : "no-jumbo";
+ struct timespec time_now;
+
+ if (l2reflect_interrupt == 0)
+ snprintf(timeout_bound_str, sizeof(timeout_bound_str), " polling");
+ else if (l2reflect_sleep_msec == -1u)
+ snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">= infinite");
+ else
+ snprintf(timeout_bound_str, sizeof(timeout_bound_str), ">%9" PRIu64 "ms",
+ l2reflect_sleep_msec);
+
+ rte_eth_stats_get(l2reflect_port_number, &stats);
+ rte_ether_format_addr(mac_str_me, sizeof(mac_str_me),
+ &l2reflect_port_eth_addr);
+ rte_ether_format_addr(mac_str_remote, sizeof(mac_str_remote),
+ &l2reflect_remote_eth_addr);
+
+ clock_gettime(CLOCK_MONOTONIC, &time_now);
+ const uint64_t time_since_start = time_now.tv_sec - record.time_start.tv_sec;
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("%sNetworking Roundtrip Test%s\n", colors->green, colors->reset);
+ printf("\n%sPort statistics ====================================%s",
+ colors->magenta, colors->reset);
+
+ printf("\nMe: %s <--> Remote: %s", mac_str_me, mac_str_remote);
+ printf("\nStatistics for port %d PID %d on lcore %d ---------"
+ "\nState: %-16s %10" PRIu64 " s"
+ "\nPackets tx: %22" PRIu64 "\nPackets rx: %22" PRIu64
+ "\nBytes tx: %24" PRIu64 " (%8.2f GiB)"
+ "\nBytes rx: %24" PRIu64 " (%8.2f GiB)"
+ "\nErrors tx: %23" PRIu64 "\nErrors rx: %23" PRIu64
+ "\nTimeouts rx: %21" PRIu64 " (%s)"
+ "\nPacketsize [Byte]: %15" PRIu64 " (%12s)",
+ l2reflect_port_number, getpid(), rte_lcore_id(),
+ runstate_tostring(l2reflect_state),
+ time_since_start,
+ stats.opackets, stats.ipackets, stats.obytes,
+ (double)stats.obytes / bytes_in_gib, stats.ibytes,
+ (double)stats.ibytes / bytes_in_gib, stats.oerrors,
+ stats.ierrors, record.timeouts,
+ timeout_bound_str, l2reflect_pkt_bytes, str_jumbo);
+ printf("\n%sPort timing statistics =============================%s",
+ colors->magenta, colors->reset);
+ if (l2reflect_state == S_ELECT_LEADER ||
+ record.min_round_ns == MIN_INITIAL) {
+ printf("\n\nBenchmark not started yet...\n");
+ } else {
+ printf("\n%sMax%s roundtrip: %19" PRIu64 " us", colors->red,
+ colors->reset, record.max_round_ns / 1000);
+ printf("\n%sAvg%s roundtrip: %19" PRIu64 " us", colors->yellow,
+ colors->reset,
+ record.rounds ? (uint64_t)(record.avg_round_ns /
+ record.rounds / 1000) :
+ 0);
+ printf("\n%sMin%s roundtrip: %19" PRIu64 " us", colors->green,
+ colors->reset, record.min_round_ns / 1000);
+ }
+ printf("\n%s====================================================%s\n",
+ colors->magenta, colors->reset);
+}
+
+void
+l2reflect_stats_loop(void)
+{
+ while (!(l2reflect_state & (S_LOCAL_TERM | S_REMOTE_TERM))) {
+ print_stats();
+ if (l2reflect_hist && l2reflect_output_hist) {
+ output_histogram_snapshot();
+ l2reflect_output_hist = 0;
+ }
+ rte_delay_us_sleep(1000000);
+ }
+}
+
+char *
+serialize_histogram(__rte_unused const struct stats *record)
+{
+#ifndef RTE_HAS_JANSSON
+ return strdup("to print histogram, build with jansson support");
+#else
+ char *str = NULL;
+ char key[8];
+ unsigned int i;
+ json_t *hist0, *cpu0, *all_cpus, *output;
+
+ output = json_object();
+ /* version: 1 */
+ json_object_set_new(output, "version", json_integer(1));
+
+ /* cpu 0 histogram */
+ hist0 = json_object();
+ for (i = 0; i < record->hist_size; ++i) {
+ /* only log positive numbers to meet jitterplot format */
+ if (record->hist[i] != 0) {
+ snprintf(key, 8, "%u", i * hist_bucket_usec);
+ json_object_set(hist0, key,
+ json_integer(record->hist[i]));
+ }
+ }
+
+ /* in case of empty histogram, set these values to zero */
+ const json_int_t min_round_us =
+ record->rounds ? record->min_round_ns / 1000 : 0;
+ const json_int_t avg_round_us =
+ record->rounds ? record->avg_round_ns / record->rounds / 1000 : 0;
+ const json_int_t max_round_us =
+ record->rounds ? record->max_round_ns / 1000 : 0;
+
+ /* cpu 0 stats */
+ cpu0 = json_object();
+ json_object_set_new(cpu0, "histogram", hist0);
+ json_object_set_new(cpu0, "count", json_integer(record->rounds));
+ json_object_set_new(cpu0, "min", json_integer(min_round_us));
+ json_object_set_new(cpu0, "max", json_integer(max_round_us));
+ json_object_set_new(cpu0, "avg", json_integer(avg_round_us));
+
+ /* combine objects */
+ all_cpus = json_object();
+ json_object_set_new(all_cpus, "0", cpu0);
+ json_object_set_new(output, "cpu", all_cpus);
+
+ str = json_dumps(output, JSON_ENSURE_ASCII | JSON_INDENT(2));
+
+ /* cleanup */
+ json_decref(output);
+
+ return str;
+#endif
+}
+
+const char *
+runstate_tostring(int s)
+{
+ switch (s) {
+ case S_ELECT_LEADER:
+ return "Electing";
+ case S_RESET_TRX:
+ return "Resetting";
+ case S_RUNNING:
+ return "Running";
+ case S_LOCAL_TERM:
+ return "Term. local";
+ case S_REMOTE_TERM:
+ return "Term. remote";
+ default:
+ return "Preparing";
+ }
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+#ifndef _L2REFLECT_STATS_H_
+#define _L2REFLECT_STATS_H_
+#include <stdint.h>
+#include <stdatomic.h>
+#include <limits.h>
+
+#include <rte_ethdev.h>
+
+#include "l2reflect.h"
+
+#define MIN_INITIAL ULONG_MAX
+#define HIST_NUM_BUCKETS_DEFAULT 128
+
+/* runtime statistics */
+struct stats {
+ uint64_t max_round_ns;
+ uint64_t min_round_ns;
+ uint64_t rounds;
+ uint64_t timeouts;
+ double avg_round_ns;
+ unsigned int hist_size;
+ /* each slot is 10us */
+ uint64_t *hist;
+ struct timespec time_start;
+};
+
+/* size of each histogram bucket in usec */
+extern unsigned int hist_bucket_usec;
+extern struct stats record;
+extern char *hist_filename;
+
+void
+init_record(void);
+void
+cleanup_record(void);
+
+void
+l2reflect_stats_loop(void);
+
+/*
+ * Write the histogram to file / stdio without any locking.
+ * When called during the measurement, values are approximations
+ * (racy reads).
+ */
+void
+output_histogram_snapshot(void);
+
+/* Print out statistics on packets dropped */
+void
+print_stats(void);
+
+/*
+ * get a JSON representation of the record
+ */
+char *
+serialize_histogram(const struct stats *record);
+
+/*
+ * get a string representation of the current runstate
+ */
+const char *
+runstate_tostring(int s);
+
+#endif /* _L2REFLECT_STATS_H_ */
new file mode 100644
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Siemens AG
+ */
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_ethdev.h>
+
+#include "utils.h"
+#include "l2reflect.h"
+
+void
+assert_link_status(int port_id)
+{
+ struct rte_eth_link link;
+ uint8_t rep_cnt = MAX_REPEAT_TIMES;
+ int link_get_err = -EINVAL;
+
+ memset(&link, 0, sizeof(link));
+ do {
+ link_get_err = rte_eth_link_get_nowait(port_id, &link);
+ if (link_get_err == 0 && link.link_status == RTE_ETH_LINK_UP)
+ break;
+ rte_delay_ms(CHECK_INTERVAL);
+ RTE_LOG(INFO, L2REFLECT, "Link not ready yet, try again...\n");
+ } while (--rep_cnt && (l2reflect_state != S_LOCAL_TERM));
+
+ if (link_get_err < 0)
+ rte_exit(EXIT_FAILURE, "error: link get is failing: %s\n",
+ rte_strerror(-link_get_err));
+ if (link.link_status == RTE_ETH_LINK_DOWN)
+ rte_exit(EXIT_FAILURE, "error: link is still down\n");
+
+ const char *linkspeed_str = rte_eth_link_speed_to_str(link.link_speed);
+ RTE_LOG(INFO, L2REFLECT,
+ "Link status on port %d: speed: %s, duplex: %s\n",
+ port_id, linkspeed_str,
+ link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX ? "full" : "half");
+}
+
+uint32_t
+eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
+{
+ uint32_t overhead_len;
+ if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
+ overhead_len = max_rx_pktlen - max_mtu;
+ else
+ overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
+ return overhead_len;
+}
+
+int
+config_port_max_pkt_len(struct rte_eth_conf *conf,
+ struct rte_eth_dev_info *dev_info)
+{
+ uint32_t overhead_len;
+ if (l2reflect_pkt_bytes < RTE_ETHER_MIN_LEN ||
+ l2reflect_pkt_bytes > MAX_JUMBO_PKT_LEN)
+ return -1;
+ overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
+ dev_info->max_mtu);
+ conf->rxmode.mtu = MAX(l2reflect_pkt_bytes - overhead_len,
+ dev_info->min_mtu);
+ if (conf->rxmode.mtu > RTE_ETHER_MTU)
+ conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Siemens AG
+ */
+
+#ifndef _L2REFLECT_UTILS_H_
+#define _L2REFLECT_UTILS_H_
+
+#define MAX_REPEAT_TIMES 30
+#define CHECK_INTERVAL 2000
+
+void assert_link_status(int port_id);
+
+uint32_t
+eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu);
+
+int
+config_port_max_pkt_len(struct rte_eth_conf *conf,
+ struct rte_eth_dev_info *dev_info);
+
+#endif /* _L2REFLECT_UTILS_H_ */
@@ -3,6 +3,7 @@
apps = [
'dumpcap',
+ 'l2reflect',
'pdump',
'proc-info',
'test-acl',