[dpdk-dev,v2,6/7] rte_sched: eliminate floating point in calculating byte clock
Commit Message
From: Stephen Hemminger <shemming@brocade.com>
The old code was doing a floating point divide for each rte_dequeue()
which is very expensive. Change to using fixed point scaled math instead.
This improved performance from 5Gbit/sec to 10 Gbit/sec
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/librte_sched/rte_sched.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
@@ -102,6 +102,9 @@
#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
+/* For cycles_per_byte calculation */
+#define RTE_SCHED_TIME_SHIFT 20
+
struct rte_sched_subport {
/* Token bucket (TB) */
uint64_t tb_time; /* time of last update */
@@ -239,7 +242,7 @@ struct rte_sched_port {
uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */
uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */
uint64_t time; /* Current NIC TX time measured in bytes */
- double cycles_per_byte; /* CPU cycles per byte */
+ uint32_t cycles_per_byte; /* CPU cycles per byte (scaled) */
/* Scheduling loop detection */
uint32_t pipe_loop;
@@ -657,7 +660,9 @@ rte_sched_port_config(struct rte_sched_port_params *params)
port->time_cpu_cycles = rte_get_tsc_cycles();
port->time_cpu_bytes = 0;
port->time = 0;
- port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate);
+
+ port->cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
+ / params->rate;
/* Scheduling loop detection */
port->pipe_loop = RTE_SCHED_PIPE_INVALID;
@@ -2156,11 +2161,12 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
{
uint64_t cycles = rte_get_tsc_cycles();
uint64_t cycles_diff = cycles - port->time_cpu_cycles;
- double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte;
+ uint64_t bytes_diff = (cycles_diff << RTE_SCHED_TIME_SHIFT)
+ / port->cycles_per_byte;
/* Advance port time */
port->time_cpu_cycles = cycles;
- port->time_cpu_bytes += (uint64_t) bytes_diff;
+ port->time_cpu_bytes += bytes_diff;
if (port->time < port->time_cpu_bytes) {
port->time = port->time_cpu_bytes;
}