[2/4] examples/qos_sched: remove TX buffering
Checks
Commit Message
Since the qos_sched app does batch dequeues from the QoS block, there is
little point in trying to batch further in the app - just send out the
full burst of packets that were received from the QoS block. With modern
CPUs and write-combining doorbells, the cost of doing smaller TX's is
reduced anyway for the worst case.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
examples/qos_sched/app_thread.c | 94 ++++-----------------------------
examples/qos_sched/main.c | 12 -----
examples/qos_sched/main.h | 6 ---
3 files changed, 9 insertions(+), 103 deletions(-)
Comments
> -----Original Message-----
> From: Richardson, Bruce <bruce.richardson@intel.com>
> Sent: Friday, February 3, 2023 10:06 AM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder <jasvinder.singh@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>
> Subject: [PATCH 2/4] examples/qos_sched: remove TX buffering
>
> Since the qos_sched app does batch dequeues from the QoS block, there is
> little point in trying to batch further in the app - just send out the
> full burst of packets that were received from the QoS block. With modern
> CPUs and write-combining doorbells, the cost of doing smaller TX's is
> reduced anyway for the worst case.
>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
> examples/qos_sched/app_thread.c | 94 ++++-----------------------------
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
@@ -104,82 +104,21 @@ app_rx_thread(struct thread_conf **confs)
}
}
-
-
-/* Send the packet to an output interface
- * For performance reason function returns number of packets dropped, not sent,
- * so 0 means that all packets were sent successfully
- */
-
-static inline void
-app_send_burst(struct thread_conf *qconf)
-{
- struct rte_mbuf **mbufs;
- uint32_t n, ret;
-
- mbufs = (struct rte_mbuf **)qconf->m_table;
- n = qconf->n_mbufs;
-
- do {
- ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
- /* we cannot drop the packets, so re-send */
- /* update number of packets to be sent */
- n -= ret;
- mbufs = (struct rte_mbuf **)&mbufs[ret];
- } while (n);
-}
-
-
-/* Send the packet to an output interface */
-static void
-app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
-{
- uint32_t i, len;
-
- len = qconf->n_mbufs;
- for(i = 0; i < nb_pkt; i++) {
- qconf->m_table[len] = mbufs[i];
- len++;
- /* enough pkts to be sent */
- if (unlikely(len == burst_conf.tx_burst)) {
- qconf->n_mbufs = len;
- app_send_burst(qconf);
- len = 0;
- }
- }
-
- qconf->n_mbufs = len;
-}
-
void
app_tx_thread(struct thread_conf **confs)
{
struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
struct thread_conf *conf;
int conf_idx = 0;
- int retval;
- const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+ int nb_pkts;
while ((conf = confs[conf_idx])) {
- retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+ nb_pkts = rte_ring_sc_dequeue_burst(conf->tx_ring, (void **)mbufs,
burst_conf.qos_dequeue, NULL);
- if (likely(retval != 0)) {
- app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
-
- conf->counter = 0; /* reset empty read loop counter */
- }
-
- conf->counter++;
-
- /* drain ring and TX queues */
- if (unlikely(conf->counter > drain_tsc)) {
- /* now check is there any packets left to be transmitted */
- if (conf->n_mbufs != 0) {
- app_send_burst(conf);
-
- conf->n_mbufs = 0;
- }
- conf->counter = 0;
+ if (likely(nb_pkts != 0)) {
+ uint16_t nb_tx = rte_eth_tx_burst(conf->tx_port, 0, mbufs, nb_pkts);
+ if (nb_pkts != nb_tx)
+ rte_pktmbuf_free_bulk(&mbufs[nb_pkts], nb_pkts - nb_tx);
}
conf_idx++;
@@ -230,7 +169,6 @@ app_mixed_thread(struct thread_conf **confs)
struct rte_mbuf *mbufs[burst_conf.ring_burst];
struct thread_conf *conf;
int conf_idx = 0;
- const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
while ((conf = confs[conf_idx])) {
uint32_t nb_pkt;
@@ -250,23 +188,9 @@ app_mixed_thread(struct thread_conf **confs)
nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
burst_conf.qos_dequeue);
if (likely(nb_pkt > 0)) {
- app_send_packets(conf, mbufs, nb_pkt);
-
- conf->counter = 0; /* reset empty read loop counter */
- }
-
- conf->counter++;
-
- /* drain ring and TX queues */
- if (unlikely(conf->counter > drain_tsc)) {
-
- /* now check is there any packets left to be transmitted */
- if (conf->n_mbufs != 0) {
- app_send_burst(conf);
-
- conf->n_mbufs = 0;
- }
- conf->counter = 0;
+ uint16_t nb_tx = rte_eth_tx_burst(conf->tx_port, 0, mbufs, nb_pkt);
+ if (nb_tx != nb_pkt)
+ rte_pktmbuf_free_bulk(&mbufs[nb_tx], nb_pkt - nb_tx);
}
conf_idx++;
@@ -105,12 +105,6 @@ app_main_loop(__rte_unused void *dummy)
}
else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
for (i = 0; i < wt_idx; i++) {
- wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
- * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
-
- if (wt_confs[i]->m_table == NULL)
- rte_panic("flow %u unable to allocate memory buffer\n", i);
-
RTE_LOG(INFO, APP,
"flow %u lcoreid %u sched+write port %u\n",
i, lcore_id, wt_confs[i]->tx_port);
@@ -120,12 +114,6 @@ app_main_loop(__rte_unused void *dummy)
}
else if (mode == APP_TX_MODE) {
for (i = 0; i < tx_idx; i++) {
- tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
- * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
-
- if (tx_confs[i]->m_table == NULL)
- rte_panic("flow %u unable to allocate memory buffer\n", i);
-
RTE_LOG(INFO, APP, "flow%u lcoreid%u write port%u\n",
i, lcore_id, tx_confs[i]->tx_port);
}
@@ -37,8 +37,6 @@ extern "C" {
#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
-#define BURST_TX_DRAIN_US 100
-
#ifndef APP_MAX_LCORE
#if (RTE_MAX_LCORE > 64)
#define APP_MAX_LCORE 64
@@ -75,10 +73,6 @@ struct thread_stat
struct thread_conf
{
- uint32_t counter;
- uint32_t n_mbufs;
- struct rte_mbuf **m_table;
-
uint16_t rx_port;
uint16_t tx_port;
uint16_t rx_queue;