@@ -64,5 +64,6 @@ DIRS-y += vhost
DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen
DIRS-y += vmdq
DIRS-y += vmdq_dcb
+DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += distributor_app
include $(RTE_SDK)/mk/rte.extsubdir.mk
new file mode 100644
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-default-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = distributor_app
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
new file mode 100644
@@ -0,0 +1,459 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_debug.h>
+#include <rte_distributor.h>
+#include "main.h"
+
+#define RX_RING_SIZE 256
+#define RX_FREE_THRESH 32
+#define RX_PTHRESH 8
+#define RX_HTHRESH 8
+#define RX_WTHRESH 0
+
+#define TX_RING_SIZE 512
+#define TX_FREE_THRESH 32
+#define TX_PTHRESH 32
+#define TX_HTHRESH 0
+#define TX_WTHRESH 0
+#define TX_RSBIT_THRESH 32
+#define TX_Q_FLAGS (ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOVLANOFFL |\
+ ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
+ ETH_TXQ_FLAGS_NOXSUMTCP)
+
+#define NUM_MBUFS ((64*1024)-1)
+#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+#define RTE_RING_SZ 1024
+
+static volatile struct app_stats {
+ struct {
+ uint64_t rx_pkts;
+ uint64_t returned_pkts;
+ uint64_t enqueued_pkts;
+ } rx __rte_cache_aligned;
+
+ struct {
+ uint64_t dequeue_pkts;
+ uint64_t tx_pkts;
+ } tx __rte_cache_aligned;
+} app_stats;
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .lpbk_mode = 0,
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6 |
+ ETH_RSS_IPV4_TCP | ETH_RSS_IPV4_UDP |
+ ETH_RSS_IPV6_TCP | ETH_RSS_IPV6_UDP,
+ }
+ },
+};
+
+static const struct rte_eth_rxconf rx_conf_default = {
+ .rx_thresh = {
+ .pthresh = RX_PTHRESH,
+ .hthresh = RX_HTHRESH,
+ .wthresh = RX_WTHRESH,
+ },
+ .rx_free_thresh = RX_FREE_THRESH,
+ .rx_drop_en = 0,
+};
+
+static const struct rte_eth_txconf tx_conf_default = {
+ .tx_thresh = {
+ .pthresh = TX_PTHRESH,
+ .hthresh = TX_HTHRESH,
+ .wthresh = TX_WTHRESH,
+ },
+ .tx_free_thresh = TX_FREE_THRESH,
+ .tx_rs_thresh = TX_RSBIT_THRESH,
+ .txq_flags = TX_Q_FLAGS
+
+};
+
+struct output_buffer {
+ unsigned count;
+ struct rte_mbuf *mbufs[BURST_SIZE + 3];
+};
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rxRings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ &rx_conf_default, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < txRings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ &tx_conf_default);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct rte_eth_link link;
+ rte_eth_link_get_nowait(port, &link);
+ if (!link.link_status) {
+ sleep(1);
+ rte_eth_link_get_nowait(port, &link);
+ }
+
+ if (!link.link_status) {
+ printf("Link down on port %"PRIu8"\n", port);
+ return 0;
+ }
+
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+struct lcore_params {
+ unsigned worker_id;
+ struct rte_distributor *d;
+ struct rte_ring *r;
+};
+
+static __attribute__((noreturn)) void
+lcore_rx(struct lcore_params *p)
+{
+ struct rte_distributor *d = p->d;
+ struct rte_ring *r = p->r;
+ const uint8_t nb_ports = rte_eth_dev_count();
+ const int socket_id = rte_socket_id();
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) != socket_id)
+ printf("WARNING, port %u is on remote NUMA node to "
+ "RX thread.\n\tPerformance will not "
+ "be optimal.\n", port);
+
+ printf("\nCore %u doing packet RX.\n", rte_lcore_id());
+ port = 0;
+ for (;;) {
+ struct rte_mbuf *bufs[BURST_SIZE*2];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
+ BURST_SIZE);
+ app_stats.rx.rx_pkts += nb_rx;
+
+ rte_distributor_process(d, bufs, nb_rx);
+ const uint16_t nb_ret = rte_distributor_returned_pkts(d,
+ bufs, BURST_SIZE*2);
+ app_stats.rx.returned_pkts += nb_ret;
+ if (unlikely(nb_ret == 0))
+ continue;
+
+ uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret);
+ app_stats.rx.enqueued_pkts += sent;
+ if (unlikely(sent < nb_ret)) {
+ printf("Packet loss due to full ring\n");
+ while (sent < nb_ret)
+ rte_pktmbuf_free(bufs[sent++]);
+ }
+ if (++port == nb_ports)
+ port = 0;
+ }
+}
+
+static inline void
+flush_one_port(struct output_buffer *outbuf, uint8_t outp)
+{
+ unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
+ outbuf->count);
+ app_stats.tx.tx_pkts += nb_tx;
+
+ if (unlikely(nb_tx < outbuf->count)) {
+ printf("Packet loss with tx_burst\n");
+ do {
+ rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
+ } while (++nb_tx < outbuf->count);
+ }
+ outbuf->count = 0;
+}
+
+static inline void
+flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports)
+{
+ uint8_t outp;
+ for (outp = 0; outp < nb_ports; outp++) {
+ if (tx_buffers[outp].count == 0)
+ continue;
+
+ flush_one_port(&tx_buffers[outp], outp);
+ }
+}
+
+static __attribute__((noreturn)) void
+lcore_tx(struct rte_ring *in_r)
+{
+ static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
+ const uint8_t nb_ports = rte_eth_dev_count();
+ const int socket_id = rte_socket_id();
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) != socket_id)
+ printf("WARNING, port %u is on remote NUMA node to "
+ "TX thread.\n\tPerformance will not "
+ "be optimal.\n", port);
+
+ printf("\nCore %u doing packet TX.\n", rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
+ (void *)bufs, BURST_SIZE);
+ app_stats.tx.dequeue_pkts += nb_rx;
+
+ /* if we get no traffic, flush anything we have */
+ if (unlikely(nb_rx == 0)) {
+ flush_all_ports(tx_buffers, nb_ports);
+ continue;
+ }
+
+ /* for traffic we receive, queue it up for transmit */
+ uint16_t i;
+ _mm_prefetch(bufs[0], 0);
+ _mm_prefetch(bufs[1], 0);
+ _mm_prefetch(bufs[2], 0);
+ for (i = 0; i < nb_rx; i++) {
+ struct output_buffer *outbuf;
+ uint8_t outp;
+ _mm_prefetch(bufs[i + 3], 0);
+ /* workers should update in_port to hold the
+ * output port value */
+ outp = bufs[i]->pkt.in_port;
+ outbuf = &tx_buffers[outp];
+ outbuf->mbufs[outbuf->count++] = bufs[i];
+ if (outbuf->count == BURST_SIZE)
+ flush_one_port(outbuf, outp);
+ }
+ }
+ }
+}
+
+
+static __attribute__((noreturn)) void
+lcore_worker(struct lcore_params *p)
+{
+ struct rte_distributor *d = p->d;
+ const unsigned id = p->worker_id;
+ /* for single port, xor_val will be zero so we won't modify the output
+ * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
+ */
+ const unsigned xor_val = (rte_eth_dev_count() > 1);
+ struct rte_mbuf *buf = NULL;
+
+ printf("\nCore %u acting as worker core.\n", rte_lcore_id());
+ for (;;) {
+ buf = rte_distributor_get_pkt(d, id, buf);
+ buf->pkt.in_port ^= xor_val;
+ }
+}
+
+static void
+int_handler(int sig_num)
+{
+ struct rte_eth_stats eth_stats;
+ unsigned i;
+
+ printf("Exiting on signal %d\n", sig_num);
+
+ printf("\nRX thread stats:\n");
+ printf(" - Received: %"PRIu64"\n", app_stats.rx.rx_pkts);
+ printf(" - Processed: %"PRIu64"\n", app_stats.rx.returned_pkts);
+ printf(" - Enqueued: %"PRIu64"\n", app_stats.rx.enqueued_pkts);
+
+ printf("\nTX thread stats:\n");
+ printf(" - Dequeued: %"PRIu64"\n", app_stats.tx.dequeue_pkts);
+ printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts);
+
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ rte_eth_stats_get(i, ð_stats);
+ printf("\nPort %u stats:\n", i);
+ printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
+ printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
+ printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
+ printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
+ printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
+ }
+ exit(0);
+}
+
+/* Main function, does initialization and calls the per-lcore functions */
+int
+MAIN(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ struct rte_distributor *d;
+ struct rte_ring *output_ring;
+ unsigned lcore_id, worker_id = 0;
+ unsigned nb_ports;
+ uint8_t portid;
+
+ /* catch ctrl-c so we can print on exit */
+ signal(SIGINT, int_handler);
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ if (rte_lcore_count() < 3)
+ rte_exit(EXIT_FAILURE, "Error, This application needs at "
+ "least 3 logical cores to run:\n"
+ "1 lcore for packet RX and distribution\n"
+ "1 lcore for packet TX\n"
+ "and at least 1 lcore for worker threads\n");
+
+ if (rte_eal_pci_probe() != 0)
+ rte_exit(EXIT_FAILURE, "Error with PCI probing\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
+ if (nb_ports != 1 && (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
+ "when using a single port\n");
+
+ mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
+ portid);
+
+ d = rte_distributor_create("PKT_DIST", rte_socket_id(),
+ rte_lcore_count() - 2);
+ if (d == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
+
+ /* scheduler ring is read only by the transmitter core, but written to
+ * by multiple threads
+ */
+ output_ring = rte_ring_create("Output_ring", RTE_RING_SZ,
+ rte_socket_id(), RING_F_SC_DEQ);
+ if (output_ring == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (worker_id == rte_lcore_count() - 2)
+ rte_eal_remote_launch((lcore_function_t *)lcore_tx,
+ output_ring, lcore_id);
+ else {
+ struct lcore_params *p =
+ rte_malloc(NULL, sizeof(*p), 0);
+ if (!p)
+ rte_panic("malloc failure\n");
+ *p = (struct lcore_params){worker_id, d, output_ring};
+ rte_eal_remote_launch((lcore_function_t *)lcore_worker,
+ p, lcore_id);
+ }
+ worker_id++;
+ }
+ /* call lcore_main on master core only */
+ struct lcore_params p = { 0, d, output_ring };
+ lcore_rx(&p);
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#define MAIN _main
+#else
+#define MAIN main
+#endif
+
+int MAIN(int argc, char *argv[]);
+
+#endif /* ifndef _MAIN_H_ */