@@ -77,6 +77,42 @@ struct __rte_cache_aligned lcore_rx_queue {
uint16_t queue_id;
};
+enum L3FWD_WORKER_MODE {
+ L3FWD_WORKER_POLL,
+ L3FWD_WORKER_UNQUE,
+ L3FWD_WORKER_ORQUE,
+};
+
+struct l3fwd_wqp_param {
+ enum L3FWD_WORKER_MODE mode;
+ uint32_t qsize; /**< Number of elems in worker queue */
+ int32_t single; /**< use single queue per I/O (poll) thread */
+};
+
+extern struct l3fwd_wqp_param l3fwd_wqp_param;
+
+enum {
+ LCORE_WQ_IN,
+ LCORE_WQ_OUT,
+ LCORE_WQ_NUM,
+};
+
+union lcore_wq {
+ struct rte_ring *r[LCORE_WQ_NUM];
+ struct {
+ struct rte_soring *sor;
+ /* used by WQ, sort of thred-local var */
+ uint32_t ftoken;
+ };
+};
+
+struct lcore_wq_pool {
+ uint32_t nb_queue;
+ uint32_t qmask;
+ union lcore_wq queue[MAX_RX_QUEUE_PER_LCORE];
+ struct l3fwd_wqp_param prm;
+};
+
struct __rte_cache_aligned lcore_conf {
uint16_t n_rx_queue;
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
@@ -86,6 +122,7 @@ struct __rte_cache_aligned lcore_conf {
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
void *ipv4_lookup_struct;
void *ipv6_lookup_struct;
+ struct lcore_wq_pool wqpool;
};
extern volatile bool force_quit;
@@ -115,6 +152,8 @@ extern struct acl_algorithms acl_alg[];
extern uint32_t max_pkt_len;
+extern uint32_t l3fwd_lookup_iter_num;
+
/* Send burst of packets on an output interface */
static inline int
send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
@@ -308,6 +347,22 @@ fib_event_main_loop_tx_q_vector(__rte_unused void *dummy);
int
fib_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_d(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_d_burst(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_q(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_q_burst(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_d_vector(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_q_vector(__rte_unused void *dummy);
+int
+acl_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
/* Return ipv4/ipv6 fwd lookup struct for ACL, LPM, EM or FIB. */
void *
@@ -4,6 +4,7 @@
#include "l3fwd.h"
#include "l3fwd_route.h"
+#include "l3fwd_wqp.h"
/*
* Rule and trace formats definitions.
@@ -1003,19 +1004,21 @@ acl_process_pkts(struct rte_mbuf *pkts[MAX_PKT_BURST],
/* split packets burst depending on packet type (IPv4/IPv6) */
l3fwd_acl_prepare_acl_parameter(pkts, &acl_search, num);
- if (acl_search.num_ipv4)
- rte_acl_classify(acl_config.acx_ipv4[socketid],
+ for (i = l3fwd_lookup_iter_num; i != 0; i--) {
+ if (acl_search.num_ipv4)
+ rte_acl_classify(acl_config.acx_ipv4[socketid],
acl_search.data_ipv4,
acl_search.res_ipv4,
acl_search.num_ipv4,
DEFAULT_MAX_CATEGORIES);
- if (acl_search.num_ipv6)
- rte_acl_classify(acl_config.acx_ipv6[socketid],
+ if (acl_search.num_ipv6)
+ rte_acl_classify(acl_config.acx_ipv6[socketid],
acl_search.data_ipv6,
acl_search.res_ipv6,
acl_search.num_ipv6,
DEFAULT_MAX_CATEGORIES);
+ }
/* combine lookup results back, into one array of next hops */
n4 = 0;
@@ -1042,34 +1045,36 @@ acl_process_pkts(struct rte_mbuf *pkts[MAX_PKT_BURST],
static inline void
acl_send_packets(struct lcore_conf *qconf, struct rte_mbuf *pkts[],
- uint16_t hops[], uint32_t num)
+ uint16_t hops[], uint32_t num, int step3)
{
#if defined ACL_SEND_MULTI
- send_packets_multi(qconf, pkts, hops, num);
+ __send_packets_multi(qconf, pkts, hops, num, step3);
#else
- send_packets_single(qconf, pkts, hops, num);
+ if (step3 != 0)
+ send_packets_single(qconf, pkts, hops, num);
+ else {
+ uint32_t i;
+ for (i = 0; i != num; i++)
+ send_single_packet(qconf, pkts[i], hops[i]);
+ }
#endif
}
/* main processing loop */
-int
-acl_main_loop(__rte_unused void *dummy)
+static int
+acl_poll_loop(struct lcore_conf *qconf, uint32_t lcore_id)
{
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
uint16_t hops[MAX_PKT_BURST];
- unsigned int lcore_id;
uint64_t prev_tsc, diff_tsc, cur_tsc;
- int i, nb_rx;
+ uint32_t i, n, nb_rx;
uint16_t portid;
uint16_t queueid;
- struct lcore_conf *qconf;
int socketid;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
/ US_PER_S * BURST_TX_DRAIN_US;
prev_tsc = 0;
- lcore_id = rte_lcore_id();
- qconf = &lcore_conf[lcore_id];
socketid = rte_lcore_to_socket_id(lcore_id);
if (qconf->n_rx_queue == 0) {
@@ -1121,17 +1126,99 @@ acl_main_loop(__rte_unused void *dummy)
nb_rx = rte_eth_rx_burst(portid, queueid,
pkts_burst, MAX_PKT_BURST);
- if (nb_rx > 0) {
- acl_process_pkts(pkts_burst, hops, nb_rx,
- socketid);
- acl_send_packets(qconf, pkts_burst, hops,
- nb_rx);
+ if (nb_rx != 0) {
+ if (l3fwd_wqp_param.mode == L3FWD_WORKER_POLL) {
+ acl_process_pkts(pkts_burst, hops,
+ nb_rx, socketid);
+ acl_send_packets(qconf, pkts_burst,
+ hops, nb_rx, 1);
+ } else {
+ n = lcore_wq_submit(&qconf->wqpool, i,
+ pkts_burst, nb_rx);
+ if (n != nb_rx) {
+ /* update stats counter */
+ rte_pktmbuf_free_bulk(
+ pkts_burst + n,
+ nb_rx - n);
+ }
+ }
+ }
+ if (l3fwd_wqp_param.mode != L3FWD_WORKER_POLL) {
+ nb_rx = lcore_wq_receive(&qconf->wqpool, i,
+ pkts_burst, hops, MAX_PKT_BURST);
+ if (nb_rx != 0)
+ acl_send_packets(qconf, pkts_burst,
+ hops, nb_rx, 0);
+ }
+ }
+ }
+ return 0;
+}
+
+/* WT processing loop */
+static int
+acl_wqp_loop(struct lcore_conf *qconf, uint32_t lcore_id)
+{
+ int32_t socketid;
+ uint32_t i, k, n;
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+ uint16_t hops[MAX_PKT_BURST];
+
+ socketid = rte_lcore_to_socket_id(lcore_id);
+
+ if (qconf->wqpool.nb_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "%s: lcore %u has nothing to do\n",
+ __func__, lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "%s: entering loop on lcore %u\n",
+ __func__, lcore_id);
+
+ while (!force_quit) {
+
+ /*
+ * Read packet from internal queues and process them
+ */
+ for (i = 0; i < qconf->wqpool.nb_queue; ++i) {
+
+ n = lcore_wq_pull(&qconf->wqpool, i, pkts,
+ RTE_DIM(pkts));
+ if (n == 0)
+ continue;
+
+ acl_process_pkts(pkts, hops, n, socketid);
+ process_step3_burst(pkts, hops, n);
+ k = lcore_wq_push(&qconf->wqpool, i, pkts, hops, n);
+ if (n != k) {
+ /* stats update */
+ rte_pktmbuf_free_bulk(pkts + k, n - k);
}
}
}
return 0;
}
+/* main processing loop */
+int
+acl_main_loop(__rte_unused void *dummy)
+{
+ uint32_t lcore_id;
+ struct lcore_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ if (qconf->n_rx_queue != 0)
+ return acl_poll_loop(qconf, lcore_id);
+ else
+ return acl_wqp_loop(qconf, lcore_id);
+}
+
+#ifdef RTE_LIB_EVENTDEV
+#include "l3fwd_acl_event.h"
+#endif
+
/* Not used by L3fwd ACL. */
void *
acl_get_ipv4_l3fwd_lookup_struct(__rte_unused const int socketid)
new file mode 100644
@@ -0,0 +1,258 @@
+#include "l3fwd_event.h"
+
+/* One eventdev loop for single and burst using acl. */
+static __rte_always_inline void
+acl_event_loop(struct l3fwd_event_resources *evt_rsrc,
+ const uint8_t flags)
+{
+ uint32_t i, lcore_id, nb_deq, nb_enq;
+ int32_t socketid;
+ uint16_t hops[MAX_PKT_BURST];
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+ struct rte_event events[MAX_PKT_BURST];
+
+ const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
+ const uint8_t tx_q_id = evt_rsrc->evq.event_q_id[
+ evt_rsrc->evq.nb_queues - 1];
+ const uint8_t event_d_id = evt_rsrc->event_d_id;
+ const uint16_t deq_len = RTE_MIN(evt_rsrc->deq_depth, MAX_PKT_BURST);
+
+ if (event_p_id < 0)
+ return;
+
+ lcore_id = rte_lcore_id();
+ socketid = rte_lcore_to_socket_id(lcore_id);
+
+ RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
+
+ nb_deq = 0;
+ nb_enq = 0;
+
+ while (!force_quit) {
+ /* Read events from RX queues. */
+ nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id,
+ events, deq_len, 0);
+ if (nb_deq == 0) {
+ rte_pause();
+ continue;
+ }
+
+ for (i = 0; i != nb_deq; i++) {
+ pkts[i] = events[i].mbuf;
+ if (flags & L3FWD_EVENT_TX_ENQ) {
+ events[i].queue_id = tx_q_id;
+ events[i].op = RTE_EVENT_OP_FORWARD;
+ }
+ rte_event_eth_tx_adapter_txq_set(pkts[i], 0);
+ }
+
+ acl_process_pkts(pkts, hops, nb_deq, socketid);
+
+ for (i = 0; i != nb_deq; i++) {
+ process_packet(pkts[i], &hops[i]);
+ pkts[i]->port = (hops[i] != BAD_PORT) ?
+ hops[i] : pkts[i]->port;
+ }
+
+ if (flags & L3FWD_EVENT_TX_ENQ) {
+ nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
+ events, nb_deq);
+ while (nb_enq < nb_deq && !force_quit)
+ nb_enq += rte_event_enqueue_burst(event_d_id,
+ event_p_id, events + nb_enq,
+ nb_deq - nb_enq);
+ }
+
+ if (flags & L3FWD_EVENT_TX_DIRECT) {
+ nb_enq = rte_event_eth_tx_adapter_enqueue(event_d_id,
+ event_p_id, events, nb_deq, 0);
+ while (nb_enq < nb_deq && !force_quit)
+ nb_enq += rte_event_eth_tx_adapter_enqueue(
+ event_d_id, event_p_id,
+ events + nb_enq,
+ nb_deq - nb_enq, 0);
+ }
+ }
+
+ l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
+ nb_deq, 0);
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_d(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc =
+ l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_d_burst(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc =
+ l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_q(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc =
+ l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_q_burst(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc =
+ l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ);
+ return 0;
+}
+
+static __rte_always_inline void
+acl_process_event_vector(struct rte_event_vector *vec, uint16_t *hops,
+ int32_t socketid)
+{
+ uint32_t i, k;
+
+ for (i = 0; i != vec->nb_elem; i += k) {
+ k = RTE_MIN(vec->nb_elem - i, (uint32_t)MAX_PKT_BURST);
+ acl_process_pkts(vec->mbufs + i, hops + i, k, socketid);
+ }
+
+#if defined ACL_SEND_MULTI
+ k = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(&vec->mbufs[i], &hops[i]);
+ for (; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ process_packet(vec->mbufs[i], &hops[i]);
+#endif
+
+ process_event_vector(vec, hops);
+}
+
+static __rte_always_inline void
+acl_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
+ const uint8_t flags)
+{
+ uint16_t *hops;
+ int32_t socketid;
+ uint32_t i, lcore_id, nb_deq, nb_enq;
+ struct rte_event events[MAX_PKT_BURST];
+
+ const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
+ const uint8_t tx_q_id =
+ evt_rsrc->evq.event_q_id[evt_rsrc->evq.nb_queues - 1];
+ const uint8_t event_d_id = evt_rsrc->event_d_id;
+ const uint16_t deq_len = evt_rsrc->deq_depth;
+
+ if (event_p_id < 0)
+ return;
+
+ lcore_id = rte_lcore_id();
+ socketid = rte_lcore_to_socket_id(lcore_id);
+
+ hops = rte_zmalloc_socket(NULL, sizeof(hops[0]) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE, socketid);
+ if (hops == NULL) {
+ RTE_LOG(ERR, L3FWD,
+ "%s: failed to alloc internal buffers on lcore %u\n",
+ __func__, lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
+
+ nb_deq = 0;
+ nb_enq = 0;
+
+ while (!force_quit) {
+ /* Read events from RX queues. */
+ nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id, events,
+ deq_len, 0);
+ if (nb_deq == 0) {
+ rte_pause();
+ continue;
+ }
+
+ for (i = 0; i < nb_deq; i++) {
+ if (flags & L3FWD_EVENT_TX_ENQ) {
+ events[i].queue_id = tx_q_id;
+ events[i].op = RTE_EVENT_OP_FORWARD;
+ }
+
+ acl_process_event_vector(events[i].vec, hops, socketid);
+ }
+
+ if (flags & L3FWD_EVENT_TX_ENQ) {
+ nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
+ events, nb_deq);
+ while (nb_enq < nb_deq && !force_quit)
+ nb_enq += rte_event_enqueue_burst(
+ event_d_id, event_p_id, events + nb_enq,
+ nb_deq - nb_enq);
+ }
+
+ if (flags & L3FWD_EVENT_TX_DIRECT) {
+ nb_enq = rte_event_eth_tx_adapter_enqueue(
+ event_d_id, event_p_id, events, nb_deq, 0);
+ while (nb_enq < nb_deq && !force_quit)
+ nb_enq += rte_event_eth_tx_adapter_enqueue(
+ event_d_id, event_p_id, events + nb_enq,
+ nb_deq - nb_enq, 0);
+ }
+ }
+
+ l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
+ nb_deq, 1);
+ rte_free(hops);
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_d_vector(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_q_vector(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_ENQ);
+ return 0;
+}
+
+int __rte_noinline
+acl_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy)
+{
+ struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
+
+ acl_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_ENQ);
+ return 0;
+}
@@ -250,6 +250,16 @@ l3fwd_event_resource_setup(struct rte_eth_conf *port_conf)
[1][1][0] = fib_event_main_loop_tx_q_vector,
[1][1][1] = fib_event_main_loop_tx_q_burst_vector,
};
+ const event_loop_cb acl_event_loop[2][2][2] = {
+ [0][0][0] = acl_event_main_loop_tx_d,
+ [0][0][1] = acl_event_main_loop_tx_d_burst,
+ [0][1][0] = acl_event_main_loop_tx_q,
+ [0][1][1] = acl_event_main_loop_tx_q_burst,
+ [1][0][0] = acl_event_main_loop_tx_d_vector,
+ [1][0][1] = acl_event_main_loop_tx_d_burst_vector,
+ [1][1][0] = acl_event_main_loop_tx_q_vector,
+ [1][1][1] = acl_event_main_loop_tx_q_burst_vector,
+ };
uint32_t event_queue_cfg;
int ret;
@@ -293,6 +303,10 @@ l3fwd_event_resource_setup(struct rte_eth_conf *port_conf)
evt_rsrc->ops.fib_event_loop =
fib_event_loop[evt_rsrc->vector_enabled][evt_rsrc->tx_mode_q]
[evt_rsrc->has_burst];
+
+ evt_rsrc->ops.acl_event_loop =
+ acl_event_loop[evt_rsrc->vector_enabled][evt_rsrc->tx_mode_q]
+ [evt_rsrc->has_burst];
}
static void
@@ -58,6 +58,7 @@ struct l3fwd_event_setup_ops {
event_loop_cb lpm_event_loop;
event_loop_cb em_event_loop;
event_loop_cb fib_event_loop;
+ event_loop_cb acl_event_loop;
};
struct l3fwd_event_resources {
@@ -86,12 +86,35 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
_mm_storeu_si128((__m128i *)eth_hdr, te);
}
+static inline void
+process_step3_burst(struct rte_mbuf *pkt[], uint16_t dst_port[], uint32_t num)
+{
+ uint32_t i, k;
+
+ k = RTE_ALIGN_FLOOR(num, FWDSTEP);
+
+ for (i = 0; i != k; i += FWDSTEP)
+ processx4_step3(pkt + i, dst_port + i);
+
+ /* Process up to last 3 packets one by one. */
+ switch (num % FWDSTEP) {
+ case 3:
+ process_packet(pkt[i + 2], dst_port + i + 2);
+ /* fall-through */
+ case 2:
+ process_packet(pkt[i + 1], dst_port + i + 1);
+ /* fall-through */
+ case 1:
+ process_packet(pkt[i], dst_port + i);
+ }
+}
+
/**
* Send packets burst from pkts_burst to the ports in dst_port array
*/
static __rte_always_inline void
-send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
- uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+__send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+ uint16_t dst_port[MAX_PKT_BURST], int nb_rx, int step3)
{
int32_t k;
int j = 0;
@@ -110,13 +133,15 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
lp = pnum;
lp[0] = 1;
- processx4_step3(pkts_burst, dst_port);
+ if (step3 != 0)
+ processx4_step3(pkts_burst, dst_port);
/* dp1: <d[0], d[1], d[2], d[3], ... > */
dp1 = _mm_loadu_si128((__m128i *)dst_port);
for (j = FWDSTEP; j != k; j += FWDSTEP) {
- processx4_step3(&pkts_burst[j], &dst_port[j]);
+ if (step3 != 0)
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
/*
* dp2:
@@ -155,17 +180,20 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
/* Process up to last 3 packets one by one. */
switch (nb_rx % FWDSTEP) {
case 3:
- process_packet(pkts_burst[j], dst_port + j);
+ if (step3 != 0)
+ process_packet(pkts_burst[j], dst_port + j);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
j++;
/* fall-through */
case 2:
- process_packet(pkts_burst[j], dst_port + j);
+ if (step3 != 0)
+ process_packet(pkts_burst[j], dst_port + j);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
j++;
/* fall-through */
case 1:
- process_packet(pkts_burst[j], dst_port + j);
+ if (step3 != 0)
+ process_packet(pkts_burst[j], dst_port + j);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
j++;
}
@@ -194,6 +222,13 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+ uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+ __send_packets_multi(qconf, pkts_burst, dst_port, nb_rx, 1);
+}
+
static __rte_always_inline uint16_t
process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
{
new file mode 100644
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Huawei Technologies Co., Ltd
+ */
+
+#include "l3fwd.h"
+#include "l3fwd_wqp.h"
+
+static int
+wqp_ring_init(struct rte_ring **r, uint32_t num, size_t sz, int32_t sid,
+ uint32_t flags)
+{
+ char name[RTE_RING_NAMESIZE];
+
+ *r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, sid);
+ if (*r == NULL)
+ return -ENOMEM;
+
+ snprintf(name, sizeof(name), "%p", *r);
+ return rte_ring_init(*r, name, num, flags);
+}
+
+static int
+wqp_soring_init(struct rte_soring **r, struct rte_soring_param *prm, size_t sz,
+ int32_t sid)
+{
+ char name[RTE_RING_NAMESIZE];
+
+ *r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, sid);
+ if (*r == NULL)
+ return -ENOMEM;
+
+ snprintf(name, sizeof(name), "%p", *r);
+ prm->name = name;
+ return rte_soring_init(*r, prm);
+}
+
+static void
+wqp_fini(struct lcore_conf *lc)
+{
+ uint32_t i, j;
+
+ if (lc->n_rx_queue == 0)
+ return;
+
+ for (i = 0; i != lc->wqpool.nb_queue; i++) {
+ for (j = 0; j != RTE_DIM(lc->wqpool.queue[i].r); j++)
+ rte_free(lc->wqpool.queue[i].r[j]);
+ }
+
+ memset(&lc->wqpool, 0, sizeof(lc->wqpool));
+}
+
+static int
+l3fwd_wqp_unque_init(struct lcore_conf *lc, const struct l3fwd_wqp_param *prm,
+ uint32_t lcid)
+{
+ int32_t rc, sid;
+ uint32_t i, n, nq;
+ size_t szi, szo;
+
+ sid = rte_lcore_to_socket_id(lcid);
+
+ n = rte_align32pow2(prm->qsize);
+
+ szi = rte_ring_get_memsize(n);
+ szo = rte_ring_get_memsize_elem(sizeof(struct wqelm), n);
+
+ nq = (prm->single == 0) ? lc->n_rx_queue : 1;
+ lc->wqpool.nb_queue = nq;
+ lc->wqpool.qmask = (prm->single != 0) ? 0 : UINT32_MAX;
+
+ rc = 0;
+ for (i = 0; i != nq; i++) {
+
+ rc = wqp_ring_init(&lc->wqpool.queue[i].r[LCORE_WQ_IN], n, szi,
+ sid, RING_F_SP_ENQ);
+ if (rc != 0)
+ break;
+
+ rc = wqp_ring_init(&lc->wqpool.queue[i].r[LCORE_WQ_OUT], n, szo,
+ sid, RING_F_SC_DEQ);
+ if (rc != 0)
+ break;
+ }
+
+ if (i != nq) {
+ printf("error: %s failed at %u-th queue, error code: %d\n",
+ __func__, i, rc);
+ wqp_fini(lc);
+ }
+
+ lc->wqpool.prm = *prm;
+ return rc;
+}
+
+static int
+l3fwd_wqp_orque_init(struct lcore_conf *lc, const struct l3fwd_wqp_param *qprm,
+ uint32_t lcid)
+{
+ int32_t rc, sid;
+ uint32_t i, n, nq;
+ ssize_t sz;
+ struct rte_soring_param prm;
+
+ sid = rte_lcore_to_socket_id(lcid);
+
+ memset(&prm, 0, sizeof(prm));
+
+ n = 2 * qprm->qsize;
+ prm.elems = n;
+ prm.elem_size = sizeof(uintptr_t);
+ prm.meta_size = sizeof(uint32_t);
+ prm.stages = 1;
+ prm.prod_synt = RTE_RING_SYNC_ST;
+ prm.cons_synt = RTE_RING_SYNC_ST;
+
+ sz = rte_soring_get_memsize(&prm);
+ if (sz < 0)
+ return sz;
+
+ nq = (qprm->single == 0) ? lc->n_rx_queue : 1;
+ lc->wqpool.nb_queue = nq;
+ lc->wqpool.qmask = (qprm->single != 0) ? 0 : UINT32_MAX;
+
+ rc = 0;
+ for (i = 0; i != nq; i++) {
+
+ rc = wqp_soring_init(&lc->wqpool.queue[i].sor, &prm, sz, sid);
+ if (rc != 0)
+ break;
+ }
+
+ if (i != nq) {
+ printf("error: %s failed at %u-th queue, error code: %d\n",
+ __func__, i, rc);
+ wqp_fini(lc);
+ }
+
+ lc->wqpool.prm = *qprm;
+ return rc;
+}
+
+static int
+wqp_init(struct lcore_conf *lc, const struct l3fwd_wqp_param *prm,
+ uint32_t lcid)
+{
+ /* this is I/O poll lcore */
+ if (lc->n_rx_queue != 0) {
+ if (prm->mode == L3FWD_WORKER_UNQUE)
+ return l3fwd_wqp_unque_init(lc, prm, lcid);
+ else if (prm->mode == L3FWD_WORKER_ORQUE)
+ return l3fwd_wqp_orque_init(lc, prm, lcid);
+ else
+ return -ENOTSUP;
+ }
+
+ return -ENOTSUP;
+}
+
+void
+l3fwd_wqp_fini(struct lcore_conf lc[RTE_MAX_LCORE])
+{
+ uint32_t lcid;
+
+ for (lcid = 0; lcid != RTE_MAX_LCORE; lcid++)
+ wqp_fini(lc + lcid);
+}
+
+static int
+check_set_wqp_param(struct l3fwd_wqp_param *prm)
+{
+ uint32_t n;
+
+ if (prm->qsize == 0) {
+ n = RTE_MAX(nb_rxd, nb_txd);
+ n = n + n / 2;
+ prm->qsize = n;
+ }
+
+ return 0;
+}
+
+static void print_wqp_param(const struct l3fwd_wqp_param *prm)
+{
+ printf("%s(%p): mode=%d, qsize=%u, single=%d\n",
+ __func__, prm, prm->mode, prm->qsize, prm->single);
+}
+
+int
+l3fwd_wqp_init(struct lcore_conf lc[RTE_MAX_LCORE],
+ const struct l3fwd_wqp_param *qprm)
+{
+ int32_t rc;
+ uint32_t i, j, k, lcid, m, n, nrxq, nwqt;
+ union lcore_wq *wqp;
+ struct l3fwd_wqp_param prm;
+
+ if (qprm->mode == L3FWD_WORKER_POLL)
+ return 0;
+
+ prm = *qprm;
+ rc = check_set_wqp_param(&prm);
+ print_wqp_param(&prm);
+ if (rc < 0) {
+ printf("error: %s invalid paramer values\n", __func__);
+ return rc;
+ }
+
+ nrxq = 0;
+ nwqt = 0;
+ for (lcid = 0; lcid != RTE_MAX_LCORE; lcid++) {
+ if (rte_lcore_is_enabled(lcid) == 0)
+ continue;
+ if (lc[lcid].n_rx_queue != 0)
+ nrxq += (prm.single != 0) ? 1 : lc[lcid].n_rx_queue;
+ nwqt += (lc[lcid].n_rx_queue == 0);
+ }
+
+ printf("%s: total worker queues: %u, total WQ threads: %u\n",
+ __func__, nrxq, nwqt);
+ if (nrxq == 0)
+ return 0;
+
+ if (nrxq > nwqt * MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: %s not enough WQ threads to handle all RXQs\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ for (lcid = 0; lcid != RTE_MAX_LCORE; lcid++) {
+ if (rte_lcore_is_enabled(lcid) == 0 || lc[lcid].n_rx_queue == 0)
+ continue;
+ rc = wqp_init(lc + lcid, &prm, lcid);
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0)
+ return rc;
+
+ /* create a temp pool of all RX queues */
+ wqp = malloc(sizeof(wqp[0]) * nrxq);
+ if (wqp == NULL) {
+ l3fwd_wqp_fini(lc);
+ return -ENOMEM;
+ }
+
+ n = 0;
+ for (lcid = 0; lcid != RTE_MAX_LCORE; lcid++) {
+ memcpy(wqp + n, lc[lcid].wqpool.queue,
+ lc[lcid].wqpool.nb_queue * sizeof(wqp[0]));
+ n += lc[lcid].wqpool.nb_queue;
+ }
+
+ /* distribute them across all worker threads */
+ k = 0;
+ m = RTE_MIN(RTE_DIM(lc[lcid].wqpool.queue), n);
+ for (lcid = 0; lcid != RTE_MAX_LCORE; lcid++) {
+ if (rte_lcore_is_enabled(lcid) == 0 ||
+ lc[lcid].wqpool.nb_queue != 0)
+ continue;
+ j = k;
+ for (i = 0; i != m; i++) {
+ lc[lcid].wqpool.queue[i] = wqp[j];
+ j = (j + 1) % n;
+ }
+ lc[lcid].wqpool.nb_queue = i;
+ lc[lcid].wqpool.qmask = UINT32_MAX;
+ lc[lcid].wqpool.prm = prm;
+ k = j;
+ }
+
+ free(wqp);
+ return rc;
+}
new file mode 100644
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2024 Huawei Technologies Co., Ltd
+ */
+
+#ifndef L3FWD_WQP_H
+#define L3FWD_WQP_H
+
+#include <rte_soring.h>
+
+struct wqelm {
+ struct rte_mbuf *mb;
+ uint32_t rc;
+} __rte_packed;
+
+
+void l3fwd_wqp_fini(struct lcore_conf lc[RTE_MAX_LCORE]);
+int l3fwd_wqp_init(struct lcore_conf lc[RTE_MAX_LCORE],
+ const struct l3fwd_wqp_param *prm);
+
+static inline uint32_t
+lcore_wq_submit(const struct lcore_wq_pool *wqp, uint32_t idx,
+ struct rte_mbuf * const pkts[MAX_PKT_BURST], uint32_t num)
+{
+ idx &= wqp->qmask;
+
+ if (wqp->prm.mode == L3FWD_WORKER_UNQUE) {
+ struct rte_ring *r = wqp->queue[idx].r[LCORE_WQ_IN];
+ return rte_ring_enqueue_burst(r, (void * const *)pkts, num,
+ NULL);
+ } else if (wqp->prm.mode == L3FWD_WORKER_ORQUE) {
+ struct rte_soring *sor = wqp->queue[idx].sor;
+ return rte_soring_enqueue_burst(sor, pkts, num, NULL);
+ }
+
+ rte_errno = ENOTSUP;
+ return 0;
+}
+
+static inline uint32_t
+lcore_wq_receive(const struct lcore_wq_pool *wqp, uint32_t idx,
+ struct rte_mbuf *pkts[MAX_PKT_BURST], uint16_t hops[MAX_PKT_BURST],
+ uint32_t num)
+{
+ uint32_t i, n;
+ uint32_t rcs[MAX_PKT_BURST];
+ struct wqelm elm[MAX_PKT_BURST];
+
+ idx &= wqp->qmask;
+
+ if (wqp->prm.mode == L3FWD_WORKER_UNQUE) {
+ struct rte_ring *r = wqp->queue[idx].r[LCORE_WQ_OUT];
+
+ n = rte_ring_dequeue_burst_elem(r, elm, sizeof(elm[0]), num,
+ NULL);
+ for (i = 0; i != n; i++) {
+ pkts[i] = elm[i].mb;
+ hops[i] = elm[i].rc;
+ }
+
+ return n;
+
+ } else if (wqp->prm.mode == L3FWD_WORKER_ORQUE) {
+ struct rte_soring *sor = wqp->queue[idx].sor;
+
+ n = rte_soring_dequeux_burst(sor, pkts, rcs, num, NULL);
+ for (i = 0; i != n; i++)
+ hops[i] = rcs[i];
+
+ return n;
+ }
+
+ rte_errno = ENOTSUP;
+ return 0;
+}
+
+static inline uint32_t
+lcore_wq_pull(struct lcore_wq_pool *wqp, uint32_t idx,
+ struct rte_mbuf *pkts[MAX_PKT_BURST], uint32_t num)
+{
+ idx &= wqp->qmask;
+
+ if (wqp->prm.mode == L3FWD_WORKER_UNQUE) {
+ struct rte_ring *r = wqp->queue[idx].r[LCORE_WQ_IN];
+ return rte_ring_dequeue_burst(r, (void **)pkts, num, NULL);
+
+ } else if (wqp->prm.mode == L3FWD_WORKER_ORQUE) {
+ struct rte_soring *sor = wqp->queue[idx].sor;
+ return rte_soring_acquire_burst(sor, pkts, 0, num,
+ &wqp->queue[idx].ftoken, NULL);
+ }
+
+ rte_errno = ENOTSUP;
+ return 0;
+}
+
+static inline uint32_t
+lcore_wq_push(const struct lcore_wq_pool *wqp, uint32_t idx,
+ struct rte_mbuf * const pkts[MAX_PKT_BURST],
+ const uint16_t hops[MAX_PKT_BURST], uint32_t num)
+{
+ uint32_t i;
+ uint32_t rcs[MAX_PKT_BURST];
+ struct wqelm elm[MAX_PKT_BURST];
+
+ idx &= wqp->qmask;
+
+ if (wqp->prm.mode == L3FWD_WORKER_UNQUE) {
+ struct rte_ring *r = wqp->queue[idx].r[LCORE_WQ_OUT];
+
+ for (i = 0; i != num; i++) {
+ elm[i].mb = pkts[i];
+ elm[i].rc = hops[i];
+ }
+ return rte_ring_enqueue_burst_elem(r, elm, sizeof(elm[0]), num,
+ NULL);
+ } else if (wqp->prm.mode == L3FWD_WORKER_ORQUE) {
+ struct rte_soring *sor = wqp->queue[idx].sor;
+
+ for (i = 0; i != num; i++)
+ rcs[i] = hops[i];
+ rte_soring_releasx(sor, NULL, rcs, 0, num,
+ wqp->queue[idx].ftoken);
+ return num;
+ }
+
+ rte_errno = ENOTSUP;
+ return 0;
+}
+
+#endif /* L3FWD_WQP_H */
@@ -47,6 +47,7 @@
#include "l3fwd.h"
#include "l3fwd_event.h"
#include "l3fwd_route.h"
+#include "l3fwd_wqp.h"
#define MAX_TX_QUEUE_PER_PORT RTE_MAX_LCORE
#define MAX_RX_QUEUE_PER_PORT 128
@@ -69,6 +70,10 @@ enum L3FWD_LOOKUP_MODE {
};
static enum L3FWD_LOOKUP_MODE lookup_mode;
+struct l3fwd_wqp_param l3fwd_wqp_param = {
+ .mode = L3FWD_WORKER_POLL,
+};
+
/* Global variables. */
static int numa_on = 1; /**< NUMA is enabled by default. */
static int parse_ptype; /**< Parse packet type using rx callback, and */
@@ -246,6 +251,8 @@ const struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
{{32, 1, 2, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 15},
};
+uint32_t l3fwd_lookup_iter_num = 1;
+
/*
* API's called during initialization to setup ACL/EM/LPM rules.
*/
@@ -453,6 +460,23 @@ print_usage(const char *prgname)
ACL_LEAD_CHAR, ROUTE_LEAD_CHAR, alg);
}
+static int
+parse_uint_val(const char *str, uint32_t *val, uint32_t min, uint32_t max)
+{
+ char *end = NULL;
+ unsigned long v;
+
+ errno = 0;
+ v = strtoul(str, &end, 0);
+ if (errno != 0 || end == NULL || *end != '\0')
+ return -1;
+ if (v < min || v > max)
+ return -1;
+
+ *val = v;
+ return 0;
+}
+
static int
parse_max_pkt_len(const char *pktlen)
{
@@ -572,16 +596,35 @@ parse_eth_dest(const char *optarg)
}
static void
-parse_mode(const char *optarg __rte_unused)
+parse_mode(const char *optarg)
{
+ l3fwd_wqp_param.mode = L3FWD_WORKER_POLL;
+
#ifdef RTE_LIB_EVENTDEV
struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
+ evt_rsrc->enabled = false;
+
if (!strcmp(optarg, "poll"))
evt_rsrc->enabled = false;
else if (!strcmp(optarg, "eventdev"))
evt_rsrc->enabled = true;
+ else
#endif
+ if (strcmp(optarg, "wqorder") == 0) {
+ l3fwd_wqp_param.mode = L3FWD_WORKER_ORQUE;
+ l3fwd_wqp_param.single = 0;
+ } else if (strcmp(optarg, "wqunorder") == 0) {
+ l3fwd_wqp_param.mode = L3FWD_WORKER_UNQUE;
+ l3fwd_wqp_param.single = 0;
+ } else if (strcmp(optarg, "wqorderS") == 0) {
+ l3fwd_wqp_param.mode = L3FWD_WORKER_ORQUE;
+ l3fwd_wqp_param.single = 1;
+ } else if (strcmp(optarg, "wqunorderS") == 0) {
+ l3fwd_wqp_param.mode = L3FWD_WORKER_UNQUE;
+ l3fwd_wqp_param.single = 1;
+ } else
+ rte_exit(EXIT_FAILURE, "unknown mode: %s\n", optarg);
}
static void
@@ -698,6 +741,8 @@ static const char short_options[] =
#define CMD_LINE_OPT_RULE_IPV4 "rule_ipv4"
#define CMD_LINE_OPT_RULE_IPV6 "rule_ipv6"
#define CMD_LINE_OPT_ALG "alg"
+#define CMD_LINE_OPT_WQSIZE "wqsize"
+#define CMD_LINE_OPT_LOOKUP_ITER "lookup-iter"
enum {
/* long options mapped to a short option */
@@ -726,7 +771,9 @@ enum {
CMD_LINE_OPT_LOOKUP_NUM,
CMD_LINE_OPT_ENABLE_VECTOR_NUM,
CMD_LINE_OPT_VECTOR_SIZE_NUM,
- CMD_LINE_OPT_VECTOR_TMO_NS_NUM
+ CMD_LINE_OPT_VECTOR_TMO_NS_NUM,
+ CMD_LINE_OPT_WQSIZE_NUM,
+ CMD_LINE_OPT_LOOKUP_ITER_NUM,
};
static const struct option lgopts[] = {
@@ -753,6 +800,8 @@ static const struct option lgopts[] = {
{CMD_LINE_OPT_RULE_IPV4, 1, 0, CMD_LINE_OPT_RULE_IPV4_NUM},
{CMD_LINE_OPT_RULE_IPV6, 1, 0, CMD_LINE_OPT_RULE_IPV6_NUM},
{CMD_LINE_OPT_ALG, 1, 0, CMD_LINE_OPT_ALG_NUM},
+ {CMD_LINE_OPT_WQSIZE, 1, 0, CMD_LINE_OPT_WQSIZE_NUM},
+ {CMD_LINE_OPT_LOOKUP_ITER, 1, 0, CMD_LINE_OPT_LOOKUP_ITER_NUM},
{NULL, 0, 0, 0}
};
@@ -934,6 +983,18 @@ parse_args(int argc, char **argv)
case CMD_LINE_OPT_ALG_NUM:
l3fwd_set_alg(optarg);
break;
+ case CMD_LINE_OPT_WQSIZE_NUM:
+ ret = parse_uint_val(optarg, &l3fwd_wqp_param.qsize,
+ RX_DESC_DEFAULT, UINT16_MAX);
+ if (ret < 0)
+ return ret;
+ break;
+ case CMD_LINE_OPT_LOOKUP_ITER_NUM:
+ ret = parse_uint_val(optarg, &l3fwd_lookup_iter_num,
+ 1, UINT16_MAX);
+ if (ret < 0)
+ return ret;
+ break;
default:
print_usage(prgname);
return -1;
@@ -1588,6 +1649,8 @@ main(int argc, char **argv)
l3fwd_lkp.main_loop = evt_rsrc->ops.em_event_loop;
else if (lookup_mode == L3FWD_LOOKUP_FIB)
l3fwd_lkp.main_loop = evt_rsrc->ops.fib_event_loop;
+ else if (lookup_mode == L3FWD_LOOKUP_ACL)
+ l3fwd_lkp.main_loop = evt_rsrc->ops.acl_event_loop;
else
l3fwd_lkp.main_loop = evt_rsrc->ops.lpm_event_loop;
} else
@@ -1640,6 +1703,12 @@ main(int argc, char **argv)
}
}
+ /* init worker queues for lcores (if any) */
+ ret = l3fwd_wqp_init(lcore_conf, &l3fwd_wqp_param);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "l3fwd_wqp_init: err=%d, lcore=%u\n",
+ ret, lcore_id);
+
check_all_ports_link_status(enabled_port_mask);
ret = 0;
@@ -1695,6 +1764,8 @@ main(int argc, char **argv)
/* clean up config file routes */
l3fwd_lkp.free_routes();
+ l3fwd_wqp_fini(lcore_conf);
+
/* clean up the EAL */
rte_eal_cleanup();
@@ -16,6 +16,7 @@ sources = files(
'l3fwd_event_generic.c',
'l3fwd_fib.c',
'l3fwd_lpm.c',
+ 'l3fwd_wqp.c',
'main.c',
)
if dpdk_conf.has('RTE_LIB_EVENTDEV')