@@ -1502,6 +1502,7 @@ F: lib/librte_node/
IF Proxy - EXPERIMENTAL
M: Andrzej Ostruszka <aostruszka@marvell.com>
F: lib/librte_if_proxy/
+F: examples/l3fwd-ifpx/
F: app/test/test_if_proxy.c
F: doc/guides/prog_guide/if_proxy_lib.rst
@@ -84,6 +84,7 @@ else
$(info vm_power_manager requires libvirt >= 0.9.3)
endif
endif
+DIRS-$(CONFIG_RTE_LIBRTE_IF_PROXY) += l3fwd-ifpx
DIRS-y += eventdev_pipeline
new file mode 100644
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Marvell International Ltd.
+
+# binary name
+APP = l3fwd
+
+# all source are stored in SRCS-y
+SRCS-y := main.c l3fwd.c
+
+# Build using pkg-config variables if possible
+ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)
+
+all: shared
+.PHONY: shared static
+shared: build/$(APP)-shared
+ ln -sf $(APP)-shared build/$(APP)
+static: build/$(APP)-static
+ ln -sf $(APP)-static build/$(APP)
+
+PKGCONF ?= pkg-config
+
+PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null)
+CFLAGS += -DALLOW_EXPERIMENTAL_API -O3 $(shell $(PKGCONF) --cflags libdpdk)
+LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
+LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)
+
+build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
+
+build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
+
+build:
+ @mkdir -p $@
+
+.PHONY: clean
+clean:
+ rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
+ test -d build && rmdir -p build || true
+
+else # Build using legacy build system
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, detect a build directory, by looking for a path with a .config
+RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+CFLAGS += -I$(SRCDIR)
+CFLAGS += -O3 $(USER_FLAGS)
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_if_proxy -lrte_ethdev -lrte_eal
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+endif
new file mode 100644
@@ -0,0 +1,1131 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Marvell International Ltd.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+
+#ifndef USE_HASH_CRC
+#include <rte_jhash.h>
+#else
+#include <rte_hash_crc.h>
+#endif
+
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+#include <rte_if_proxy.h>
+
+#include "l3fwd.h"
+
+#define DO_RFC_1812_CHECKS
+
+#define IPV4_L3FWD_LPM_MAX_RULES 1024
+#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
+#define IPV6_L3FWD_LPM_MAX_RULES 1024
+#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
+
+static volatile bool ifpx_ready;
+
+/* ethernet addresses of ports */
+static
+union lladdr_t port_mac[RTE_MAX_ETHPORTS];
+
+static struct rte_lpm *ipv4_routes;
+static struct rte_lpm6 *ipv6_routes;
+
+static
+struct ipv4_gateway {
+ uint16_t port;
+ union lladdr_t lladdr;
+ uint32_t ip;
+} ipv4_gateways[128];
+
+static
+struct ipv6_gateway {
+ uint16_t port;
+ union lladdr_t lladdr;
+ uint8_t ip[16];
+} ipv6_gateways[128];
+
+/* The lowest 2 bits of next hop (which is 24/21 bit for IPv4/6) are reserved to
+ * encode:
+ * 00 -> host route: higher bits of next hop are port id and dst MAC should be
+ * based on dst IP
+ * 01 -> gateway route: higher bits of next hop are index into gateway array and
+ * use port and MAC cached there (if no MAC cached yet then search for it
+ * based on gateway IP)
+ * 10 -> proxy entry: packet directed to us, just take higher bits as port id of
+ * proxy and send packet there (without any modification)
+ * The port id (16 bits) will always fit however this will not work if you
+ * need more than 2^20 gateways.
+ */
+enum route_type {
+ HOST_ROUTE = 0x00,
+ GW_ROUTE = 0x01,
+ PROXY_ADDR = 0x02,
+};
+
+RTE_STD_C11
+_Static_assert(RTE_DIM(ipv4_gateways) <= (1 << 22) &&
+ RTE_DIM(ipv6_gateways) <= (1 << 19),
+ "Gateway array index has to fit within next_hop with 2 bits reserved");
+
+static
+uint32_t find_add_gateway(uint16_t port, uint32_t ip)
+{
+ uint32_t i, idx = -1U;
+
+ for (i = 0; i < RTE_DIM(ipv4_gateways); ++i) {
+ /* Remember first free slot in case GW is not present. */
+ if (idx == -1U && ipv4_gateways[i].ip == 0)
+ idx = i;
+ else if (ipv4_gateways[i].ip == ip)
+ /* For now assume that given GW will be always at the
+ * same port, so no checking for that
+ */
+ return i;
+ }
+ if (idx != -1U) {
+ ipv4_gateways[idx].port = port;
+ ipv4_gateways[idx].ip = ip;
+ /* Since ARP tables are kept per lcore MAC will be updated
+ * during first lookup.
+ */
+ }
+ return idx;
+}
+
+static
+void clear_gateway(uint32_t ip)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_DIM(ipv4_gateways); ++i) {
+ if (ipv4_gateways[i].ip == ip) {
+ ipv4_gateways[i].ip = 0;
+ ipv4_gateways[i].lladdr.val = 0;
+ ipv4_gateways[i].port = RTE_MAX_ETHPORTS;
+ break;
+ }
+ }
+}
+
+static
+uint32_t find_add_gateway6(uint16_t port, const uint8_t *ip)
+{
+ uint32_t i, idx = -1U;
+
+ for (i = 0; i < RTE_DIM(ipv6_gateways); ++i) {
+ /* Remember first free slot in case GW is not present. */
+ if (idx == -1U && ipv6_gateways[i].ip[0] == 0)
+ idx = i;
+ else if (ipv6_gateways[i].ip[0])
+ /* For now assume that given GW will be always at the
+ * same port, so no checking for that
+ */
+ return i;
+ }
+ if (idx != -1U) {
+ ipv6_gateways[idx].port = port;
+ memcpy(ipv6_gateways[idx].ip, ip, 16);
+ /* Since ARP tables are kept per lcore MAC will be updated
+ * during first lookup.
+ */
+ }
+ return idx;
+}
+
+static
+void clear_gateway6(const uint8_t *ip)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_DIM(ipv6_gateways); ++i) {
+ if (memcmp(ipv6_gateways[i].ip, ip, 16) == 0) {
+ memset(&ipv6_gateways[i].ip, 0, 16);
+ ipv6_gateways[i].lladdr.val = 0;
+ ipv6_gateways[i].port = RTE_MAX_ETHPORTS;
+ break;
+ }
+ }
+}
+
+/* Assumptions:
+ * - Link related changes (MAC/MTU/...) need to be executed once, and it's OK
+ * to run them from the callback - if this is not the case (e.g. -EBUSY for
+ * MTU change, then event notification need to be used and more sophisticated
+ * coordination with lcore loops and stopping/starting of the ports: for
+ * example lcores not receiving on this port just mark it as inactive and stop
+ * transmitting to it and the one with RX stops the port sets the MAC starts
+ * it and notifies other lcores that it is back).
+ * - LPM is safe to be modified by one writer, and read by many without any
+ * locks (it looks to me like this is the case), however upon routing change
+ * there might be a transient period during which packets are not directed
+ * according to new rule.
+ * - Hash is unsafe to be used that way (and I don't want to turn on relevant
+ * flags just to excersize queued notifications) so every lcore keeps its
+ * copy of relevant data.
+ * Therefore there are callbacks defined for the routing info/address changes
+ * and remaining ones are handled via events on per lcore basis.
+ */
+static
+int mac_change(const struct rte_ifpx_mac_change *ev)
+{
+ int i;
+ struct rte_ether_addr mac_addr;
+ char buf[RTE_ETHER_ADDR_FMT_SIZE];
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ rte_ether_format_addr(buf, sizeof(buf), &ev->mac);
+ RTE_LOG(DEBUG, L3FWD, "MAC change for port %d: %s\n",
+ ev->port_id, buf);
+ }
+ /* NOTE - use copy because RTE functions don't take const args */
+ rte_ether_addr_copy(&ev->mac, &mac_addr);
+ i = rte_eth_dev_default_mac_addr_set(ev->port_id, &mac_addr);
+ if (i == -EOPNOTSUPP)
+ i = rte_eth_dev_mac_addr_add(ev->port_id, &mac_addr, 0);
+ if (i < 0)
+ RTE_LOG(WARNING, L3FWD, "Failed to set MAC address\n");
+ else {
+ port_mac[ev->port_id].mac.addr = ev->mac;
+ port_mac[ev->port_id].mac.valid = 1;
+ }
+ return 1;
+}
+
+static
+int link_change(const struct rte_ifpx_link_change *ev)
+{
+ uint16_t proxy_id = rte_ifpx_proxy_get(ev->port_id);
+ uint32_t mask;
+
+ /* Mark the proxy too since we get only port notifications. */
+ mask = 1U << ev->port_id | 1U << proxy_id;
+
+ RTE_LOG(DEBUG, L3FWD, "Link change for port %d: %d\n",
+ ev->port_id, ev->is_up);
+ if (ev->is_up) {
+ rte_eth_dev_set_link_up(ev->port_id);
+ active_port_mask |= mask;
+ } else {
+ rte_eth_dev_set_link_down(ev->port_id);
+ active_port_mask &= ~mask;
+ }
+ active_port_mask &= enabled_port_mask;
+ return 1;
+}
+
+static
+int addr_add(const struct rte_ifpx_addr_change *ev)
+{
+ char buf[INET_ADDRSTRLEN];
+ uint32_t ip;
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ ip = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv4 address for port %d: %s\n",
+ ev->port_id, buf);
+ }
+ rte_lpm_add(ipv4_routes, ev->ip, 32,
+ ev->port_id << 2 | PROXY_ADDR);
+ return 1;
+}
+
+static
+int route_add(const struct rte_ifpx_route_change *ev)
+{
+ char buf[INET_ADDRSTRLEN];
+ uint32_t nh, ip;
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ ip = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv4 route for port %d: %s/%d\n",
+ ev->port_id, buf, ev->depth);
+ }
+
+ /* On Linux upon changing of the IP we get notification for both addr
+ * and route, so just check if we already have addr entry and if so
+ * then ignore this notification.
+ */
+ if (ev->depth == 32 &&
+ rte_lpm_lookup(ipv4_routes, ev->ip, &nh) == 0 && nh & PROXY_ADDR)
+ return 1;
+
+ if (ev->gateway) {
+ nh = find_add_gateway(ev->port_id, ev->gateway);
+ if (nh != -1U)
+ rte_lpm_add(ipv4_routes, ev->ip, ev->depth,
+ nh << 2 | GW_ROUTE);
+ else
+ RTE_LOG(WARNING, L3FWD, "No free slot in GW array\n");
+ } else
+ rte_lpm_add(ipv4_routes, ev->ip, ev->depth,
+ ev->port_id << 2 | HOST_ROUTE);
+ return 1;
+}
+
+static
+int addr_del(const struct rte_ifpx_addr_change *ev)
+{
+ char buf[INET_ADDRSTRLEN];
+ uint32_t ip;
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ ip = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv4 address removed from port %d: %s\n",
+ ev->port_id, buf);
+ }
+ rte_lpm_delete(ipv4_routes, ev->ip, 32);
+ return 1;
+}
+
+static
+int route_del(const struct rte_ifpx_route_change *ev)
+{
+ char buf[INET_ADDRSTRLEN];
+ uint32_t ip;
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ ip = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv4 route removed from port %d: %s/%d\n",
+ ev->port_id, buf, ev->depth);
+ }
+ if (ev->gateway)
+ clear_gateway(ev->gateway);
+ rte_lpm_delete(ipv4_routes, ev->ip, ev->depth);
+ return 1;
+}
+
+static
+int addr6_add(const struct rte_ifpx_addr6_change *ev)
+{
+ char buf[INET6_ADDRSTRLEN];
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ inet_ntop(AF_INET6, ev->ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv6 address for port %d: %s\n",
+ ev->port_id, buf);
+ }
+ rte_lpm6_add(ipv6_routes, ev->ip, 128,
+ ev->port_id << 2 | PROXY_ADDR);
+ return 1;
+}
+
+static
+int route6_add(const struct rte_ifpx_route6_change *ev)
+{
+ char buf[INET6_ADDRSTRLEN];
+
+ /* See comment in route_add(). */
+ uint32_t nh;
+ if (ev->depth == 128 &&
+ rte_lpm6_lookup(ipv6_routes, ev->ip, &nh) == 0 && nh & PROXY_ADDR)
+ return 1;
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ inet_ntop(AF_INET6, ev->ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv6 route for port %d: %s/%d\n",
+ ev->port_id, buf, ev->depth);
+ }
+ /* no valid IPv6 address starts with 0x00 */
+ if (ev->gateway[0]) {
+ nh = find_add_gateway6(ev->port_id, ev->ip);
+ if (nh != -1U)
+ rte_lpm6_add(ipv6_routes, ev->ip, ev->depth,
+ nh << 2 | GW_ROUTE);
+ else
+ RTE_LOG(WARNING, L3FWD, "No free slot in GW6 array\n");
+ } else
+ rte_lpm6_add(ipv6_routes, ev->ip, ev->depth,
+ ev->port_id << 2 | HOST_ROUTE);
+ return 1;
+}
+
+static
+int addr6_del(const struct rte_ifpx_addr6_change *ev)
+{
+ char buf[INET6_ADDRSTRLEN];
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ inet_ntop(AF_INET6, ev->ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv6 address removed from port %d: %s\n",
+ ev->port_id, buf);
+ }
+ rte_lpm6_delete(ipv6_routes, ev->ip, 128);
+ return 1;
+}
+
+static
+int route6_del(const struct rte_ifpx_route6_change *ev)
+{
+ char buf[INET_ADDRSTRLEN];
+
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ inet_ntop(AF_INET6, ev->ip, buf, sizeof(buf));
+ RTE_LOG(DEBUG, L3FWD, "IPv6 route removed from port %d: %s/%d\n",
+ ev->port_id, buf, ev->depth);
+ }
+ if (ev->gateway[0])
+ clear_gateway6(ev->gateway);
+ rte_lpm6_delete(ipv6_routes, ev->ip, ev->depth);
+ return 1;
+}
+
+static
+int cfg_done(void)
+{
+ uint16_t port_id, px;
+ const struct rte_ifpx_info *pinfo;
+
+ RTE_LOG(DEBUG, L3FWD, "Proxy config finished\n");
+
+ /* Copy MAC addresses of the proxies - to be used as src MAC during
+ * forwarding.
+ */
+ RTE_ETH_FOREACH_DEV(port_id) {
+ px = rte_ifpx_proxy_get(port_id);
+ if (px != RTE_MAX_ETHPORTS && px != port_id) {
+ pinfo = rte_ifpx_info_get(px);
+ rte_ether_addr_copy(&pinfo->mac,
+ &port_mac[port_id].mac.addr);
+ port_mac[port_id].mac.valid = 1;
+ }
+ }
+
+ ifpx_ready = 1;
+ return 1;
+}
+
+static
+struct rte_ifpx_callbacks ifpx_callbacks = {
+ .mac_change = mac_change,
+#if 0
+ .mtu_change = mtu_change,
+#endif
+ .link_change = link_change,
+ .addr_add = addr_add,
+ .addr_del = addr_del,
+ .addr6_add = addr6_add,
+ .addr6_del = addr6_del,
+ .route_add = route_add,
+ .route_del = route_del,
+ .route6_add = route6_add,
+ .route6_del = route6_del,
+ .cfg_done = cfg_done,
+};
+
+int init_if_proxy(void)
+{
+ char buf[16];
+ unsigned int i;
+
+ rte_ifpx_callbacks_register(&ifpx_callbacks);
+
+ RTE_LCORE_FOREACH(i) {
+ if (lcore_conf[i].n_rx_queue == 0)
+ continue;
+ snprintf(buf, sizeof(buf), "IFPX-events_%d", i);
+ lcore_conf[i].ev_queue = rte_ring_create(buf, 16, SOCKET_ID_ANY,
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (!lcore_conf[i].ev_queue) {
+ RTE_LOG(ERR, L3FWD,
+ "Failed to create event queue for lcore %d\n",
+ i);
+ return -1;
+ }
+ rte_ifpx_queue_add(lcore_conf[i].ev_queue);
+ }
+
+ return rte_ifpx_listen();
+}
+
+void close_if_proxy(void)
+{
+ unsigned int i;
+
+ RTE_LCORE_FOREACH(i) {
+ if (lcore_conf[i].n_rx_queue == 0)
+ continue;
+ rte_ring_free(lcore_conf[i].ev_queue);
+ }
+ rte_ifpx_close();
+}
+
+void wait_for_config_done(void)
+{
+ while (!ifpx_ready)
+ rte_delay_ms(100);
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline
+int is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct rte_ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif
+
+/* Send burst of packets on an output interface */
+static inline
+int send_burst(struct lcore_conf *lconf, uint16_t n, uint16_t port)
+{
+ struct rte_mbuf **m_table;
+ int ret;
+ uint16_t queueid;
+
+ queueid = lconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)lconf->tx_mbufs[port].m_table;
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline
+int send_single_packet(struct lcore_conf *lconf,
+ struct rte_mbuf *m, uint16_t port)
+{
+ uint16_t len;
+
+ len = lconf->tx_mbufs[port].len;
+ lconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ send_burst(lconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ lconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+static inline
+int ipv4_get_destination(const struct rte_ipv4_hdr *ipv4_hdr,
+ struct rte_lpm *lpm, uint32_t *next_hop)
+{
+ return rte_lpm_lookup(lpm,
+ rte_be_to_cpu_32(ipv4_hdr->dst_addr),
+ next_hop);
+}
+
+static inline
+int ipv6_get_destination(const struct rte_ipv6_hdr *ipv6_hdr,
+ struct rte_lpm6 *lpm, uint32_t *next_hop)
+{
+ return rte_lpm6_lookup(lpm, ipv6_hdr->dst_addr, next_hop);
+}
+
+static
+uint16_t ipv4_process_pkt(struct lcore_conf *lconf,
+ struct rte_ether_hdr *eth_hdr,
+ struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid)
+{
+ union lladdr_t lladdr = { 0 };
+ int i;
+ uint32_t ip, nh;
+
+ /* Here we know that packet is not from proxy - this case is handled
+ * in the main loop - so if we fail to find destination we will direct
+ * it to the proxy.
+ */
+ if (ipv4_get_destination(ipv4_hdr, ipv4_routes, &nh) < 0)
+ return rte_ifpx_proxy_get(portid);
+
+ if (nh & PROXY_ADDR)
+ return nh >> 2;
+
+ /* Packet not to us so update src/dst MAC. */
+ if (nh & GW_ROUTE) {
+ i = nh >> 2;
+ if (ipv4_gateways[i].lladdr.mac.valid)
+ lladdr = ipv4_gateways[i].lladdr;
+ else {
+ i = rte_hash_lookup(lconf->neigh_hash,
+ &ipv4_gateways[i].ip);
+ if (i < 0)
+ return rte_ifpx_proxy_get(portid);
+ lladdr = lconf->neigh_map[i];
+ ipv4_gateways[i].lladdr = lladdr;
+ }
+ nh = ipv4_gateways[i].port;
+ } else {
+ nh >>= 2;
+ ip = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+ i = rte_hash_lookup(lconf->neigh_hash, &ip);
+ if (i < 0)
+ return rte_ifpx_proxy_get(portid);
+ lladdr = lconf->neigh_map[i];
+ }
+
+ RTE_ASSERT(lladdr.mac.valid);
+ RTE_ASSERT(port_mac[nh].mac.valid);
+ /* dst addr */
+ *(uint64_t *)ð_hdr->d_addr = lladdr.val;
+ /* src addr */
+ rte_ether_addr_copy(&port_mac[nh].mac.addr, ð_hdr->s_addr);
+
+ return nh;
+}
+
+static
+uint16_t ipv6_process_pkt(struct lcore_conf *lconf,
+ struct rte_ether_hdr *eth_hdr,
+ struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid)
+{
+ union lladdr_t lladdr = { 0 };
+ int i;
+ uint32_t nh;
+
+ /* Here we know that packet is not from proxy - this case is handled
+ * in the main loop - so if we fail to find destination we will direct
+ * it to the proxy.
+ */
+ if (ipv6_get_destination(ipv6_hdr, ipv6_routes, &nh) < 0)
+ return rte_ifpx_proxy_get(portid);
+
+ if (nh & PROXY_ADDR)
+ return nh >> 2;
+
+ /* Packet not to us so update src/dst MAC. */
+ if (nh & GW_ROUTE) {
+ i = nh >> 2;
+ if (ipv6_gateways[i].lladdr.mac.valid)
+ lladdr = ipv6_gateways[i].lladdr;
+ else {
+ i = rte_hash_lookup(lconf->neigh6_hash,
+ ipv6_gateways[i].ip);
+ if (i < 0)
+ return rte_ifpx_proxy_get(portid);
+ lladdr = lconf->neigh6_map[i];
+ ipv6_gateways[i].lladdr = lladdr;
+ }
+ nh = ipv6_gateways[i].port;
+ } else {
+ nh >>= 2;
+ i = rte_hash_lookup(lconf->neigh6_hash, ipv6_hdr->dst_addr);
+ if (i < 0)
+ return rte_ifpx_proxy_get(portid);
+ lladdr = lconf->neigh6_map[i];
+ }
+
+ RTE_ASSERT(lladdr.mac.valid);
+ /* dst addr */
+ *(uint64_t *)ð_hdr->d_addr = lladdr.val;
+ /* src addr */
+ rte_ether_addr_copy(&port_mac[nh].mac.addr, ð_hdr->s_addr);
+
+ return nh;
+}
+
+static __rte_always_inline
+void l3fwd_lpm_simple_forward(struct rte_mbuf *m, uint16_t portid,
+ struct lcore_conf *lconf)
+{
+ struct rte_ether_hdr *eth_hdr;
+ uint32_t nh;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ /* Handle IPv4 headers.*/
+ struct rte_ipv4_hdr *ipv4_hdr;
+
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+ sizeof(*eth_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+ nh = ipv4_process_pkt(lconf, eth_hdr, ipv4_hdr, portid);
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* Handle IPv6 headers.*/
+ struct rte_ipv6_hdr *ipv6_hdr;
+
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+ sizeof(*eth_hdr));
+
+ nh = ipv6_process_pkt(lconf, eth_hdr, ipv6_hdr, portid);
+ } else
+ /* Unhandled protocol */
+ nh = rte_ifpx_proxy_get(portid);
+
+ if (nh >= RTE_MAX_ETHPORTS || (active_port_mask & 1 << nh) == 0)
+ rte_pktmbuf_free(m);
+ else
+ send_single_packet(lconf, m, nh);
+}
+
+static inline
+void l3fwd_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint16_t portid, struct lcore_conf *lconf)
+{
+ int32_t j;
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
+
+ /* Prefetch and forward already prefetched packets. */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_lpm_simple_forward(pkts_burst[j], portid, lconf);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++)
+ l3fwd_lpm_simple_forward(pkts_burst[j], portid, lconf);
+}
+
+static
+void handle_neigh_add(struct lcore_conf *lconf,
+ const struct rte_ifpx_neigh_change *ev)
+{
+ char mac[RTE_ETHER_ADDR_FMT_SIZE];
+ char ip[INET_ADDRSTRLEN];
+ int32_t i, a;
+
+ i = rte_hash_add_key(lconf->neigh_hash, &ev->ip);
+ if (i < 0) {
+ RTE_LOG(WARNING, L3FWD, "Failed to add IPv4 neighbour entry\n");
+ return;
+ }
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ rte_ether_format_addr(mac, sizeof(mac), &ev->mac);
+ a = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &a, ip, sizeof(ip));
+ RTE_LOG(DEBUG, L3FWD, "Neighbour update for port %d: %s -> %s@%d\n",
+ ev->port_id, ip, mac, i);
+ }
+ lconf->neigh_map[i].mac.addr = ev->mac;
+ lconf->neigh_map[i].mac.valid = 1;
+}
+
+static
+void handle_neigh_del(struct lcore_conf *lconf,
+ const struct rte_ifpx_neigh_change *ev)
+{
+ char ip[INET_ADDRSTRLEN];
+ int32_t i, a;
+
+ i = rte_hash_del_key(lconf->neigh_hash, &ev->ip);
+ if (i < 0) {
+ RTE_LOG(WARNING, L3FWD,
+ "Failed to remove IPv4 neighbour entry\n");
+ return;
+ }
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ a = rte_cpu_to_be_32(ev->ip);
+ inet_ntop(AF_INET, &a, ip, sizeof(ip));
+ RTE_LOG(DEBUG, L3FWD, "Neighbour removal for port %d: %s\n",
+ ev->port_id, ip);
+ }
+ lconf->neigh_map[i].val = 0;
+}
+
+static
+void handle_neigh6_add(struct lcore_conf *lconf,
+ const struct rte_ifpx_neigh6_change *ev)
+{
+ char mac[RTE_ETHER_ADDR_FMT_SIZE];
+ char ip[INET6_ADDRSTRLEN];
+ int32_t i;
+
+ i = rte_hash_add_key(lconf->neigh6_hash, ev->ip);
+ if (i < 0) {
+ RTE_LOG(WARNING, L3FWD, "Failed to add IPv4 neighbour entry\n");
+ return;
+ }
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ rte_ether_format_addr(mac, sizeof(mac), &ev->mac);
+ inet_ntop(AF_INET6, ev->ip, ip, sizeof(ip));
+ RTE_LOG(DEBUG, L3FWD, "Neighbour update for port %d: %s -> %s@%d\n",
+ ev->port_id, ip, mac, i);
+ }
+ lconf->neigh6_map[i].mac.addr = ev->mac;
+ lconf->neigh6_map[i].mac.valid = 1;
+}
+
+static
+void handle_neigh6_del(struct lcore_conf *lconf,
+ const struct rte_ifpx_neigh6_change *ev)
+{
+ char ip[INET6_ADDRSTRLEN];
+ int32_t i;
+
+ i = rte_hash_del_key(lconf->neigh6_hash, ev->ip);
+ if (i < 0) {
+ RTE_LOG(WARNING, L3FWD, "Failed to remove IPv6 neighbour entry\n");
+ return;
+ }
+ if (rte_log_get_level(RTE_LOGTYPE_L3FWD) >= (int)RTE_LOG_DEBUG) {
+ inet_ntop(AF_INET6, ev->ip, ip, sizeof(ip));
+ RTE_LOG(DEBUG, L3FWD, "Neighbour removal for port %d: %s\n",
+ ev->port_id, ip);
+ }
+ lconf->neigh6_map[i].val = 0;
+}
+
+static
+void handle_events(struct lcore_conf *lconf)
+{
+ struct rte_ifpx_event *ev;
+
+ while (rte_ring_dequeue(lconf->ev_queue, (void **)&ev) == 0) {
+ switch (ev->type) {
+ case RTE_IFPX_NEIGH_ADD:
+ handle_neigh_add(lconf, &ev->neigh_change);
+ break;
+ case RTE_IFPX_NEIGH_DEL:
+ handle_neigh_del(lconf, &ev->neigh_change);
+ break;
+ case RTE_IFPX_NEIGH6_ADD:
+ handle_neigh6_add(lconf, &ev->neigh6_change);
+ break;
+ case RTE_IFPX_NEIGH6_DEL:
+ handle_neigh6_del(lconf, &ev->neigh6_change);
+ break;
+ default:
+ RTE_LOG(WARNING, L3FWD,
+ "Unexpected event: %d\n", ev->type);
+ }
+ free(ev);
+ }
+}
+
+void setup_lpm(void)
+{
+ struct rte_lpm6_config cfg6;
+ struct rte_lpm_config cfg4;
+
+ /* create the LPM table */
+ cfg4.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
+ cfg4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S;
+ cfg4.flags = 0;
+ ipv4_routes = rte_lpm_create("IPV4_L3FWD_LPM", SOCKET_ID_ANY, &cfg4);
+ if (ipv4_routes == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table\n");
+
+ /* create the LPM6 table */
+ cfg6.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
+ cfg6.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
+ cfg6.flags = 0;
+ ipv6_routes = rte_lpm6_create("IPV6_L3FWD_LPM", SOCKET_ID_ANY, &cfg6);
+ if (ipv6_routes == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table\n");
+}
+
+static
+uint32_t hash_ipv4(const void *key, uint32_t key_len __rte_unused,
+ uint32_t init_val)
+{
+#ifndef USE_HASH_CRC
+ return rte_jhash_1word(*(const uint32_t *)key, init_val);
+#else
+ return rte_hash_crc_4byte(*(const uint32_t *)key, init_val);
+#endif
+}
+
+static
+uint32_t hash_ipv6(const void *key, uint32_t key_len __rte_unused,
+ uint32_t init_val)
+{
+#ifndef USE_HASH_CRC
+ return rte_jhash_32b(key, 4, init_val);
+#else
+ const uint64_t *pk = key;
+ init_val = rte_hash_crc_8byte(*pk, init_val);
+ return rte_hash_crc_8byte(*(pk+1), init_val);
+#endif
+}
+
+static
+int setup_neigh(struct lcore_conf *lconf)
+{
+ char buf[16];
+ struct rte_hash_parameters ipv4_hparams = {
+ .name = buf,
+ .entries = L3FWD_NEIGH_ENTRIES,
+ .key_len = 4,
+ .hash_func = hash_ipv4,
+ .hash_func_init_val = 0,
+ };
+ struct rte_hash_parameters ipv6_hparams = {
+ .name = buf,
+ .entries = L3FWD_NEIGH_ENTRIES,
+ .key_len = 16,
+ .hash_func = hash_ipv6,
+ .hash_func_init_val = 0,
+ };
+
+ snprintf(buf, sizeof(buf), "neigh_hash-%d", rte_lcore_id());
+ lconf->neigh_hash = rte_hash_create(&ipv4_hparams);
+ snprintf(buf, sizeof(buf), "neigh_map-%d", rte_lcore_id());
+ lconf->neigh_map = rte_zmalloc(buf,
+ L3FWD_NEIGH_ENTRIES*sizeof(*lconf->neigh_map),
+ 8);
+ if (lconf->neigh_hash == NULL || lconf->neigh_map == NULL) {
+ RTE_LOG(ERR, L3FWD,
+ "Unable to create the l3fwd ARP/IPv4 table (lcore %d)\n",
+ rte_lcore_id());
+ return -1;
+ }
+
+ snprintf(buf, sizeof(buf), "neigh6_hash-%d", rte_lcore_id());
+ lconf->neigh6_hash = rte_hash_create(&ipv6_hparams);
+ snprintf(buf, sizeof(buf), "neigh6_map-%d", rte_lcore_id());
+ lconf->neigh6_map = rte_zmalloc(buf,
+ L3FWD_NEIGH_ENTRIES*sizeof(*lconf->neigh6_map),
+ 8);
+ if (lconf->neigh6_hash == NULL || lconf->neigh6_map == NULL) {
+ RTE_LOG(ERR, L3FWD,
+ "Unable to create the l3fwd ARP/IPv6 table (lcore %d)\n",
+ rte_lcore_id());
+ return -1;
+ }
+ return 0;
+}
+
+int lpm_check_ptype(int portid)
+{
+ int i, ret;
+ int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
+ uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
+ if (ret <= 0)
+ return 0;
+
+ uint32_t ptypes[ret];
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
+ for (i = 0; i < ret; ++i) {
+ if (ptypes[i] & RTE_PTYPE_L3_IPV4)
+ ptype_l3_ipv4 = 1;
+ if (ptypes[i] & RTE_PTYPE_L3_IPV6)
+ ptype_l3_ipv6 = 1;
+ }
+
+ if (ptype_l3_ipv4 == 0)
+ RTE_LOG(WARNING, L3FWD,
+ "port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
+
+ if (ptype_l3_ipv6 == 0)
+ RTE_LOG(WARNING, L3FWD,
+ "port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
+
+ if (ptype_l3_ipv4 && ptype_l3_ipv6)
+ return 1;
+
+ return 0;
+
+}
+
+static inline
+void lpm_parse_ptype(struct rte_mbuf *m)
+{
+ struct rte_ether_hdr *eth_hdr;
+ uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+ uint16_t ether_type;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
+ packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+ else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6))
+ packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+
+ m->packet_type = packet_type;
+}
+
+uint16_t lpm_cb_parse_ptype(uint16_t port __rte_unused,
+ uint16_t queue __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused,
+ void *user_param __rte_unused)
+{
+ unsigned int i;
+
+ if (unlikely(nb_pkts == 0))
+ return nb_pkts;
+ rte_prefetch0(rte_pktmbuf_mtod(pkts[0], struct ether_hdr *));
+ for (i = 0; i < (unsigned int) (nb_pkts - 1); ++i) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1],
+ struct ether_hdr *));
+ lpm_parse_ptype(pkts[i]);
+ }
+ lpm_parse_ptype(pkts[i]);
+
+ return nb_pkts;
+}
+
+/* main processing loop */
+int lpm_main_loop(void *dummy __rte_unused)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned int lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, j, nb_rx;
+ uint16_t portid;
+ uint8_t queueid;
+ struct lcore_conf *lconf;
+ struct lcore_rx_queue *rxq;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ lconf = &lcore_conf[lcore_id];
+
+ if (setup_neigh(lconf) < 0) {
+ RTE_LOG(ERR, L3FWD, "lcore %u failed to setup its ARP tables\n",
+ lcore_id);
+ return 0;
+ }
+
+ if (lconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < lconf->n_rx_queue; i++) {
+
+ portid = lconf->rx_queue_list[i].port_id;
+ queueid = lconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD,
+ " -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ while (!force_quit) {
+
+ cur_tsc = rte_rdtsc();
+ /*
+ * TX burst and event queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc % drain_tsc == 0)) {
+
+ for (i = 0; i < lconf->n_tx_port; ++i) {
+ portid = lconf->tx_port_id[i];
+ if (lconf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(lconf,
+ lconf->tx_mbufs[portid].len,
+ portid);
+ lconf->tx_mbufs[portid].len = 0;
+ }
+
+ if (diff_tsc > EV_QUEUE_DRAIN * drain_tsc) {
+ if (lconf->ev_queue &&
+ !rte_ring_empty(lconf->ev_queue))
+ handle_events(lconf);
+ prev_tsc = cur_tsc;
+ }
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < lconf->n_rx_queue; ++i) {
+ rxq = &lconf->rx_queue_list[i];
+ portid = rxq->port_id;
+ queueid = rxq->queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+ if (nb_rx == 0)
+ continue;
+ /* If current queue is from proxy interface then there
+ * is no need to figure out destination port - just
+ * forward it to the bound port.
+ */
+ if (unlikely(rxq->dst_port != RTE_MAX_ETHPORTS)) {
+ for (j = 0; j < nb_rx; ++j)
+ send_single_packet(lconf, pkts_burst[j],
+ rxq->dst_port);
+ } else
+ l3fwd_send_packets(nb_rx, pkts_burst, portid,
+ lconf);
+ }
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Marvell International Ltd.
+ */
+
+#ifndef __L3_FWD_H__
+#define __L3_FWD_H__
+
+#include <stdbool.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_hash.h>
+
+#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+#define EV_QUEUE_DRAIN 5 /* Check event queue every 5 TX drains */
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+
+/*
+ * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
+ */
+#define MAX_TX_BURST (MAX_PKT_BURST / 2)
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/* Hash parameters. */
+#ifdef RTE_ARCH_64
+/* default to 4 million hash entries (approx) */
+#define L3FWD_HASH_ENTRIES (1024*1024*4)
+#else
+/* 32-bit has less address-space for hugepage memory, limit to 1M entries */
+#define L3FWD_HASH_ENTRIES (1024*1024*1)
+#endif
+#define HASH_ENTRY_NUMBER_DEFAULT 4
+/* Default ARP table size */
+#define L3FWD_NEIGH_ENTRIES 1024
+
+union lladdr_t {
+ uint64_t val;
+ struct {
+ struct rte_ether_addr addr;
+ uint16_t valid;
+ } mac;
+};
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+struct lcore_rx_queue {
+ uint16_t port_id;
+ uint16_t dst_port;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+struct lcore_conf {
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t n_tx_port;
+ uint16_t tx_port_id[RTE_MAX_ETHPORTS];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+ struct rte_ring *ev_queue;
+ union lladdr_t *neigh_map;
+ struct rte_hash *neigh_hash;
+ union lladdr_t *neigh6_map;
+ struct rte_hash *neigh6_hash;
+} __rte_cache_aligned;
+
+extern volatile bool force_quit;
+
+/* mask of enabled/active ports */
+extern uint32_t enabled_port_mask;
+extern uint32_t active_port_mask;
+
+extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+int init_if_proxy(void);
+void close_if_proxy(void);
+
+void wait_for_config_done(void);
+
+void setup_lpm(void);
+
+int lpm_check_ptype(int portid);
+
+uint16_t
+lpm_cb_parse_ptype(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
+
+int lpm_main_loop(__attribute__((unused)) void *dummy);
+
+#endif /* __L3_FWD_H__ */
new file mode 100644
@@ -0,0 +1,740 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Marvell International Ltd.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include <rte_byteorder.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_if_proxy.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "l3fwd.h"
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 1024
+#define RTE_TEST_TX_DESC_DEFAULT 1024
+
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_LCORE_PARAMS 1024
+
+/* Static global variables used within this file. */
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/**< Ports set in promiscuous mode off by default. */
+static int promiscuous_on;
+
+/* Global variables. */
+
+static int parse_ptype; /**< Parse packet type using rx callback, and */
+ /**< disabled by default */
+
+volatile bool force_quit;
+
+/* mask of enabled/active ports */
+uint32_t enabled_port_mask;
+uint32_t active_port_mask;
+
+struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+struct lcore_params {
+ uint16_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static uint16_t nb_lcore_params;
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .offloads = DEV_RX_OFFLOAD_CHECKSUM,
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool *pktmbuf_pool;
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i, port_id;
+ int socketid;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ queue = lcore_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ RTE_LOG(ERR, L3FWD, "Invalid queue number: %hhu\n",
+ queue);
+ return -1;
+ }
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ RTE_LOG(ERR, L3FWD, "lcore %hhu is not enabled "
+ "in lcore mask\n", lcore);
+ return -1;
+ }
+ port_id = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << port_id)) == 0) {
+ RTE_LOG(ERR, L3FWD, "port %u is not enabled "
+ "in port mask\n", port_id);
+ return -1;
+ }
+ if (!rte_eth_dev_is_valid_port(port_id)) {
+ RTE_LOG(ERR, L3FWD, "port %u is not present "
+ "on the board\n", port_id);
+ return -1;
+ }
+ socketid = rte_lcore_to_socket_id(lcore);
+ if (socketid != 0) {
+ RTE_LOG(WARNING, L3FWD,
+ "lcore %hhu is on socket %d with numa off\n",
+ lcore, socketid);
+ }
+ }
+ return 0;
+}
+
+static int
+add_proxies(void)
+{
+ uint16_t i, p, port_id, proxy_id;
+
+ for (i = 0, p = nb_lcore_params; i < nb_lcore_params; ++i) {
+ if (p >= RTE_DIM(lcore_params)) {
+ RTE_LOG(ERR, L3FWD, "Not enough room in lcore_params "
+ "to add proxy\n");
+ return -1;
+ }
+ port_id = lcore_params[i].port_id;
+ if (rte_ifpx_proxy_get(port_id) != RTE_MAX_ETHPORTS)
+ continue;
+
+ proxy_id = rte_ifpx_proxy_create(RTE_IFPX_DEFAULT);
+ if (proxy_id == RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, L3FWD, "Failed to crate proxy\n");
+ return -1;
+ }
+ rte_ifpx_port_bind(port_id, proxy_id);
+ /* mark proxy as enabled - the corresponding port is, since we
+ * are after checking of lcore_params
+ */
+ enabled_port_mask |= 1 << proxy_id;
+ lcore_params[p].port_id = proxy_id;
+ lcore_params[p].lcore_id = lcore_params[i].lcore_id;
+ lcore_params[p].queue_id = lcore_params[i].queue_id;
+ ++p;
+ }
+
+ nb_lcore_params = p;
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint16_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port) {
+ if (lcore_params[i].queue_id == queue+1)
+ queue = lcore_params[i].queue_id;
+ else
+ rte_exit(EXIT_FAILURE, "queue ids of the port %d must be"
+ " in sequence and must start with 0\n",
+ lcore_params[i].port_id);
+ }
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, p, nb_rx_queue;
+ uint8_t lcore;
+ struct lcore_rx_queue *rq;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ RTE_LOG(ERR, L3FWD,
+ "too many queues (%u) for lcore: %u\n",
+ (unsigned int)nb_rx_queue + 1,
+ (unsigned int)lcore);
+ return -1;
+ }
+ rq = &lcore_conf[lcore].rx_queue_list[nb_rx_queue];
+ rq->port_id = lcore_params[i].port_id;
+ rq->queue_id = lcore_params[i].queue_id;
+ if (rte_ifpx_is_proxy(rq->port_id)) {
+ if (rte_ifpx_port_get(rq->port_id, &p, 1) > 0)
+ rq->dst_port = p;
+ else
+ RTE_LOG(WARNING, L3FWD,
+ "Found proxy that has no port bound\n");
+ } else
+ rq->dst_port = RTE_MAX_ETHPORTS;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ fprintf(stderr, "%s [EAL options] --"
+ " -p PORTMASK"
+ " [-P]"
+ " --config (port,queue,lcore)[,(port,queue,lcore)]"
+ " [--ipv6]"
+ " [--parse-ptype]"
+
+ " -p PORTMASK: Hexadecimal bitmask of ports to configure\n"
+ " -P : Enable promiscuous mode\n"
+ " --config (port,queue,lcore): Rx queue configuration\n"
+ " --ipv6: Set if running ipv6 packets\n"
+ " --parse-ptype: Set to use software to analyze packet type\n",
+ prgname);
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned int size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ p0 = strchr(p, ')');
+ if (p0 == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
+ _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ RTE_LOG(ERR, L3FWD, "exceeded max number of lcore "
+ "params: %hu\n", nb_lcore_params);
+ return -1;
+ }
+ lcore_params[nb_lcore_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ lcore_params[nb_lcore_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params[nb_lcore_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ return 0;
+}
+
+#define MAX_JUMBO_PKT_LEN 9600
+#define MEMPOOL_CACHE_SIZE 256
+
+static const char short_options[] =
+ "p:" /* portmask */
+ "P" /* promiscuous */
+ "L" /* enable long prefix match */
+ "E" /* enable exact match */
+ ;
+
+#define CMD_LINE_OPT_CONFIG "config"
+#define CMD_LINE_OPT_IPV6 "ipv6"
+#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
+enum {
+ /* long options mapped to a short option */
+
+ /* first long only option value must be >= 256, so that we won't
+ * conflict with short options
+ */
+ CMD_LINE_OPT_MIN_NUM = 256,
+ CMD_LINE_OPT_CONFIG_NUM,
+ CMD_LINE_OPT_PARSE_PTYPE_NUM,
+};
+
+static const struct option lgopts[] = {
+ {CMD_LINE_OPT_CONFIG, 1, 0, CMD_LINE_OPT_CONFIG_NUM},
+ {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, CMD_LINE_OPT_PARSE_PTYPE_NUM},
+ {NULL, 0, 0, 0}
+};
+
+/*
+ * This expression is used to calculate the number of mbufs needed
+ * depending on user input, taking into account memory for rx and
+ * tx hardware rings, cache per lcore and mtable per port per lcore.
+ * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
+ * value of 8192
+ */
+#define NB_MBUF(nports) RTE_MAX( \
+ (nports*nb_rx_queue*nb_rxd + \
+ nports*nb_lcores*MAX_PKT_BURST + \
+ nports*n_tx_queue*nb_txd + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
+ 8192U)
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+
+ argvopt = argv;
+
+ /* Error or normal output strings. */
+ while ((opt = getopt_long(argc, argvopt, short_options,
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ RTE_LOG(ERR, L3FWD, "Invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ case 'P':
+ promiscuous_on = 1;
+ break;
+
+ /* long options */
+ case CMD_LINE_OPT_CONFIG_NUM:
+ ret = parse_config(optarg);
+ if (ret) {
+ RTE_LOG(ERR, L3FWD, "Invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ case CMD_LINE_OPT_PARSE_PTYPE_NUM:
+ RTE_LOG(INFO, L3FWD, "soft parse-ptype is enabled\n");
+ parse_ptype = 1;
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (nb_lcore_params == 0) {
+ memcpy(lcore_params, lcore_params_default,
+ sizeof(lcore_params_default));
+ nb_lcore_params = RTE_DIM(lcore_params_default);
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 1; /* reset getopt lib */
+ return ret;
+}
+
+static void
+signal_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM) {
+ RTE_LOG(NOTICE, L3FWD,
+ "\n\nSignal %d received, preparing to exit...\n",
+ signum);
+ force_quit = true;
+ }
+}
+
+static int
+prepare_ptype_parser(uint16_t portid, uint16_t queueid)
+{
+ if (parse_ptype) {
+ RTE_LOG(INFO, L3FWD, "Port %d: softly parse packet type info\n",
+ portid);
+ if (rte_eth_add_rx_callback(portid, queueid,
+ lpm_cb_parse_ptype,
+ NULL))
+ return 1;
+
+ RTE_LOG(ERR, L3FWD, "Failed to add rx callback: port=%d\n",
+ portid);
+ return 0;
+ }
+
+ if (lpm_check_ptype(portid))
+ return 1;
+
+ RTE_LOG(ERR, L3FWD,
+ "port %d cannot parse packet type, please add --%s\n",
+ portid, CMD_LINE_OPT_PARSE_PTYPE);
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *lconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ unsigned int nb_ports;
+ uint32_t nb_mbufs;
+ uint16_t queueid, portid;
+ unsigned int lcore_id;
+ uint32_t nb_tx_queue, nb_lcores;
+ uint8_t nb_rx_queue, queue;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ force_quit = false;
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ if (add_proxies() < 0)
+ rte_exit(EXIT_FAILURE, "add_proxies failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+ nb_ports = rte_eth_dev_count_avail();
+
+ nb_lcores = rte_lcore_count();
+
+ /* Initial number of mbufs in pool - the amount required for hardware
+ * rx/tx rings will be added during configuration of ports.
+ */
+ nb_mbufs = nb_ports * nb_lcores * MAX_PKT_BURST + /* mbuf tables */
+ nb_lcores * MEMPOOL_CACHE_SIZE; /* per lcore cache */
+
+ /* Init the lookup structures. */
+ setup_lpm();
+
+ /* initialize all ports (including proxies) */
+ RTE_ETH_FOREACH_DEV(portid) {
+ struct rte_eth_conf local_port_conf = port_conf;
+
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ RTE_LOG(INFO, L3FWD, "Skipping disabled port %d\n",
+ portid);
+ continue;
+ }
+
+ /* init port */
+ RTE_LOG(INFO, L3FWD, "Initializing port %d ...\n", portid);
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ nb_tx_queue = nb_lcores;
+
+ ret = rte_eth_dev_info_get(portid, &dev_info);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "Error during getting device (port %u) info: %s\n",
+ portid, strerror(-ret));
+ if (nb_rx_queue > dev_info.max_rx_queues ||
+ nb_tx_queue > dev_info.max_tx_queues)
+ rte_exit(EXIT_FAILURE,
+ "Port %d cannot configure enough queues\n",
+ portid);
+
+ RTE_LOG(INFO, L3FWD, "Creating queues: nb_rxq=%d nb_txq=%u...\n",
+ nb_rx_queue, nb_tx_queue);
+
+ if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
+ local_port_conf.txmode.offloads |=
+ DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+ local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
+ dev_info.flow_type_rss_offloads;
+ if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
+ port_conf.rx_adv_conf.rss_conf.rss_hf) {
+ RTE_LOG(INFO, L3FWD,
+ "Port %u modified RSS hash function based on hardware support,"
+ "requested:%#"PRIx64" configured:%#"PRIx64"\n",
+ portid, port_conf.rx_adv_conf.rss_conf.rss_hf,
+ local_port_conf.rx_adv_conf.rss_conf.rss_hf);
+ }
+
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)nb_tx_queue,
+ &local_port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
+ &nb_txd);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot adjust number of descriptors: err=%d, "
+ "port=%d\n", ret, portid);
+
+ nb_mbufs += nb_rx_queue * nb_rxd + nb_tx_queue * nb_txd;
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ RTE_LOG(INFO, L3FWD, "\ttxq=%u,%d\n", lcore_id,
+ queueid);
+
+ txconf = &dev_info.default_txconf;
+ txconf->offloads = local_port_conf.txmode.offloads;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ SOCKET_ID_ANY, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ lconf = &lcore_conf[lcore_id];
+ lconf->tx_queue_id[portid] = queueid;
+ queueid++;
+
+ lconf->tx_port_id[lconf->n_tx_port] = portid;
+ lconf->n_tx_port++;
+ }
+ RTE_LOG(INFO, L3FWD, "\n");
+ }
+
+ /* Init pkt pool. */
+ pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool",
+ rte_align32prevpow2(nb_mbufs), MEMPOOL_CACHE_SIZE,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, SOCKET_ID_ANY);
+ if (pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ lconf = &lcore_conf[lcore_id];
+ RTE_LOG(INFO, L3FWD, "Initializing rx queues on lcore %u ...\n",
+ lcore_id);
+ /* init RX queues */
+ for (queue = 0; queue < lconf->n_rx_queue; ++queue) {
+ struct rte_eth_rxconf rxq_conf;
+
+ portid = lconf->rx_queue_list[queue].port_id;
+ queueid = lconf->rx_queue_list[queue].queue_id;
+
+ RTE_LOG(INFO, L3FWD, "\trxq=%d,%d\n", portid, queueid);
+
+ ret = rte_eth_dev_info_get(portid, &dev_info);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "Error during getting device (port %u) info: %s\n",
+ portid, strerror(-ret));
+
+ rxq_conf = dev_info.default_rxconf;
+ rxq_conf.offloads = port_conf.rxmode.offloads;
+ ret = rte_eth_rx_queue_setup(portid, queueid,
+ nb_rxd, SOCKET_ID_ANY,
+ &rxq_conf,
+ pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup: err=%d, port=%d\n",
+ ret, portid);
+ }
+ }
+
+ RTE_LOG(INFO, L3FWD, "\n");
+
+ /* start ports */
+ RTE_ETH_FOREACH_DEV(portid) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on) {
+ ret = rte_eth_promiscuous_enable(portid);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_promiscuous_enable: err=%s, port=%u\n",
+ rte_strerror(-ret), portid);
+ }
+ }
+ /* we've managed to start all enabled ports so active == enabled */
+ active_port_mask = enabled_port_mask;
+
+ RTE_LOG(INFO, L3FWD, "\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ lconf = &lcore_conf[lcore_id];
+ for (queue = 0; queue < lconf->n_rx_queue; ++queue) {
+ portid = lconf->rx_queue_list[queue].port_id;
+ queueid = lconf->rx_queue_list[queue].queue_id;
+ if (prepare_ptype_parser(portid, queueid) == 0)
+ rte_exit(EXIT_FAILURE, "ptype check fails\n");
+ }
+ }
+
+ if (init_if_proxy() < 0)
+ rte_exit(EXIT_FAILURE, "Failed to configure proxy lib\n");
+ wait_for_config_done();
+
+ ret = 0;
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(lpm_main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+
+ /* stop ports */
+ RTE_ETH_FOREACH_DEV(portid) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ RTE_LOG(INFO, L3FWD, "Closing port %d...", portid);
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ rte_log(RTE_LOG_INFO, RTE_LOGTYPE_L3FWD, " Done\n");
+ }
+
+ close_if_proxy();
+ RTE_LOG(INFO, L3FWD, "Bye...\n");
+
+ return ret;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Marvell International Ltd.
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+allow_experimental_apis = true
+deps += ['hash', 'lpm', 'if_proxy']
+sources = files('l3fwd.c', 'main.c')
@@ -25,8 +25,8 @@ all_examples = [
'l2fwd', 'l2fwd-cat', 'l2fwd-event',
'l2fwd-crypto', 'l2fwd-jobstats',
'l2fwd-keepalive', 'l3fwd',
- 'l3fwd-acl', 'l3fwd-power', 'l3fwd-graph',
- 'link_status_interrupt',
+ 'l3fwd-acl', 'l3fwd-graph', 'l3fwd-ifpx',
+ 'l3fwd-power', 'link_status_interrupt',
'multi_process/client_server_mp/mp_client',
'multi_process/client_server_mp/mp_server',
'multi_process/hotplug_mp',