@@ -155,7 +155,7 @@ CONFIG_RTE_MAX_ETHPORTS=32
CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
CONFIG_RTE_LIBRTE_IEEE1588=n
CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
-CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
+CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=n
CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
#
@@ -978,7 +978,7 @@ CONFIG_RTE_LIBRTE_ACL_DEBUG=n
#
# Compile librte_power
#
-CONFIG_RTE_LIBRTE_POWER=n
+CONFIG_RTE_LIBRTE_POWER=y
CONFIG_RTE_LIBRTE_POWER_DEBUG=n
CONFIG_RTE_MAX_LCORE_FREQS=64
@@ -28,6 +28,7 @@ DEPDIRS-librte_ethdev := librte_net librte_eal librte_mempool librte_ring
DEPDIRS-librte_ethdev += librte_mbuf
DEPDIRS-librte_ethdev += librte_kvargs
DEPDIRS-librte_ethdev += librte_meter
+DEPDIRS-librte_ethdev += librte_power
DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += librte_bbdev
DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_mbuf
DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
@@ -11,7 +11,7 @@ LIB = librte_ethdev.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring
-LDLIBS += -lrte_mbuf -lrte_kvargs -lrte_meter -lrte_telemetry
+LDLIBS += -lrte_mbuf -lrte_kvargs -lrte_meter -lrte_telemetry -lrte_power
EXPORT_MAP := rte_ethdev_version.map
@@ -27,4 +27,4 @@ headers = files('rte_ethdev.h',
'rte_tm.h',
'rte_tm_driver.h')
-deps += ['net', 'kvargs', 'meter', 'telemetry']
+deps += ['net', 'kvargs', 'meter', 'telemetry', 'power']
@@ -16,6 +16,7 @@
#include <netinet/in.h>
#include <rte_byteorder.h>
+#include <rte_cpuflags.h>
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_interrupts.h>
@@ -39,6 +40,7 @@
#include <rte_class.h>
#include <rte_ether.h>
#include <rte_telemetry.h>
+#include <rte_power.h>
#include "rte_ethdev_trace.h"
#include "rte_ethdev.h"
@@ -185,6 +187,100 @@ enum {
STAT_QMAP_RX
};
+
+static uint16_t
+rte_ethdev_pmgmt_umait(uint16_t port_id, uint16_t qidx,
+ struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+ uint16_t max_pkts __rte_unused, void *_ __rte_unused)
+{
+
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED) {
+ if (unlikely(nb_rx == 0)) {
+ dev->empty_poll_stats[qidx].num++;
+ if (unlikely(dev->empty_poll_stats[qidx].num >
+ ETH_EMPTYPOLL_MAX)) {
+ volatile void *target_addr;
+ uint64_t expected, mask;
+ uint16_t ret;
+
+ /*
+ * get address of next descriptor in the RX
+ * ring for this queue, as well as expected
+ * value and a mask.
+ */
+ ret = (*dev->dev_ops->next_rx_desc)
+ (dev->data->rx_queues[qidx],
+ &target_addr, &expected, &mask);
+ if (ret == 0)
+ /* -1ULL is maximum value for TSC */
+ rte_power_monitor(target_addr,
+ expected, mask,
+ 0, -1ULL);
+ }
+ } else
+ dev->empty_poll_stats[qidx].num = 0;
+ }
+
+ return 0;
+}
+
+static uint16_t
+rte_ethdev_pmgmt_pause(uint16_t port_id, uint16_t qidx,
+ struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+ uint16_t max_pkts __rte_unused, void *_ __rte_unused)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ int i;
+
+ if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED) {
+ if (unlikely(nb_rx == 0)) {
+
+ dev->empty_poll_stats[qidx].num++;
+
+ if (unlikely(dev->empty_poll_stats[qidx].num >
+ ETH_EMPTYPOLL_MAX)) {
+
+ for (i = 0; i < RTE_ETH_PAUSE_NUM; i++)
+ rte_pause();
+
+ }
+ } else
+ dev->empty_poll_stats[qidx].num = 0;
+ }
+
+ return 0;
+}
+
+static uint16_t
+rte_ethdev_pmgmt_scalefreq(uint16_t port_id, uint16_t qidx,
+ struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+ uint16_t max_pkts __rte_unused, void *_ __rte_unused)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED) {
+ if (unlikely(nb_rx == 0)) {
+ dev->empty_poll_stats[qidx].num++;
+ if (unlikely(dev->empty_poll_stats[qidx].num >
+ ETH_EMPTYPOLL_MAX)) {
+
+ /*scale down freq */
+ rte_power_freq_min(rte_lcore_id());
+
+ }
+ } else {
+ dev->empty_poll_stats[qidx].num = 0;
+ /* scal up freq */
+ rte_power_freq_max(rte_lcore_id());
+ }
+ }
+
+ return 0;
+}
+
int
rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
{
@@ -5113,6 +5209,108 @@ rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
return (*dev->dev_ops->pool_ops_supported)(dev, pool);
}
+int
+rte_eth_dev_power_mgmt_enable(unsigned int lcore_id,
+ uint16_t port_id,
+ enum rte_eth_dev_power_mgmt_cb_mode mode)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+ dev = &rte_eth_devices[port_id];
+
+ /* allocate memory for empty poll stats */
+ dev->empty_poll_stats = rte_malloc_socket(NULL,
+ sizeof(struct rte_eth_ep_stat)
+ * RTE_MAX_QUEUES_PER_PORT,
+ 0, dev->data->numa_node);
+
+ if (dev->empty_poll_stats == NULL)
+ return -ENOMEM;
+
+ if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED)
+ return -EINVAL;
+
+ dev->cb_mode = mode;
+
+ switch (mode) {
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_UMWAIT:
+
+ if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
+ return -ENOTSUP;
+
+ dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0,
+ rte_ethdev_pmgmt_umait, NULL);
+ break;
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_SCALE:
+
+ /* init scale freq */
+ if (rte_power_init(lcore_id))
+ return -EINVAL;
+
+ dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0,
+ rte_ethdev_pmgmt_scalefreq, NULL);
+ break;
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_PAUSE:
+
+ dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0,
+ rte_ethdev_pmgmt_pause, NULL);
+ break;
+
+ }
+
+ dev->pwr_mgmt_state = RTE_ETH_DEV_POWER_MGMT_ENABLED;
+ return 0;
+}
+
+int
+rte_eth_dev_power_mgmt_disable(unsigned int lcore_id,
+ uint16_t port_id)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+ dev = &rte_eth_devices[port_id];
+
+ /*add flag check */
+
+ if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED) {
+ /* rte_free ignores NULL so safe to call without checks */
+ rte_free(dev->empty_poll_stats);
+
+ switch (dev->cb_mode) {
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_UMWAIT:
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_PAUSE:
+
+ rte_eth_remove_rx_callback(port_id, 0,
+ dev->cur_pwr_cb);
+
+ break;
+
+ case RTE_ETH_DEV_POWER_MGMT_CB_SCALE:
+
+ rte_power_freq_max(lcore_id);
+
+ rte_eth_remove_rx_callback(port_id, 0,
+ dev->cur_pwr_cb);
+
+ if (rte_power_exit(lcore_id))
+ return -EINVAL;
+
+ break;
+ }
+
+ dev->pwr_mgmt_state = RTE_ETH_DEV_POWER_MGMT_DISABLED;
+
+ }
+ return 0;
+}
+
/**
* A set of values to describe the possible states of a switch domain.
*/
@@ -157,6 +157,7 @@ extern "C" {
#include <rte_common.h>
#include <rte_config.h>
#include <rte_ether.h>
+#include <rte_power_intrinsics.h>
#include "rte_ethdev_trace_fp.h"
#include "rte_dev_info.h"
@@ -775,6 +776,7 @@ rte_eth_rss_hf_refine(uint64_t rss_hf)
/** Maximum nb. of vlan per mirror rule */
#define ETH_MIRROR_MAX_VLANS 64
+#define ETH_EMPTYPOLL_MAX 512 /**< Empty poll number threshlold */
#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */
#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */
#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */
@@ -1603,6 +1605,25 @@ enum rte_eth_dev_state {
RTE_ETH_DEV_REMOVED,
};
+#define RTE_ETH_PAUSE_NUM 64 /* How many times to pause */
+/**
+ * Possible power management states of an ethdev port.
+ */
+enum rte_eth_dev_power_mgmt_state {
+ /** Device power management is disabled. */
+ RTE_ETH_DEV_POWER_MGMT_DISABLED = 0,
+ /** Device power management is enabled. */
+ RTE_ETH_DEV_POWER_MGMT_ENABLED,
+};
+
+enum rte_eth_dev_power_mgmt_cb_mode {
+ /** Device power management is disabled. */
+ RTE_ETH_DEV_POWER_MGMT_CB_UMWAIT = 0,
+ /** Device power management is enabled. */
+ RTE_ETH_DEV_POWER_MGMT_CB_PAUSE,
+ RTE_ETH_DEV_POWER_MGMT_CB_SCALE,
+};
+
struct rte_eth_dev_sriov {
uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */
uint8_t nb_q_per_pool; /**< rx queue number per pool */
@@ -4415,6 +4436,40 @@ __rte_experimental
int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
struct rte_eth_hairpin_cap *cap);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Enable device power management.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success
+ * <0 on error
+ */
+__rte_experimental
+int rte_eth_dev_power_mgmt_enable(unsigned int lcore_id,
+ uint16_t port_id,
+ enum rte_eth_dev_power_mgmt_cb_mode mode);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Disable device power management.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * 0 on success
+ * <0 on error
+ */
+__rte_experimental
+int rte_eth_dev_power_mgmt_disable(unsigned int lcore_id, uint16_t port_id);
+
#include <rte_ethdev_core.h>
/**
@@ -4535,6 +4590,7 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
return nb_rx;
}
+
/**
* Get the number of used descriptors of a rx queue
*
@@ -4993,6 +5049,9 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
}
+
+
+
#ifdef __cplusplus
}
#endif
@@ -603,6 +603,27 @@ typedef int (*eth_tx_hairpin_queue_setup_t)
uint16_t nb_tx_desc,
const struct rte_eth_hairpin_conf *hairpin_conf);
+/**
+ * @internal
+ * Get the next RX ring descriptor address.
+ *
+ * @param rxq
+ * ethdev queue pointer.
+ * @param tail_desc_addr
+ * the pointer point to descriptor address var.
+ *
+ * @return
+ * Negative errno value on error, 0 on success.
+ *
+ * @retval 0
+ * Success.
+ * @retval -EINVAL
+ * Failed to get descriptor address.
+ */
+typedef int (*eth_next_rx_desc_t)
+ (void *rxq, volatile void **tail_desc_addr,
+ uint64_t *expected, uint64_t *mask);
+
/**
* @internal A structure containing the functions exported by an Ethernet driver.
*/
@@ -752,6 +773,8 @@ struct eth_dev_ops {
/**< Set up device RX hairpin queue. */
eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
/**< Set up device TX hairpin queue. */
+ eth_next_rx_desc_t next_rx_desc;
+ /**< Get next RX ring descriptor address. */
};
/**
@@ -768,6 +791,14 @@ struct rte_eth_rxtx_callback {
void *param;
};
+/**
+ * @internal
+ * Structure used to hold counters for empty poll
+ */
+struct rte_eth_ep_stat {
+ uint64_t num;
+} __rte_cache_aligned;
+
/**
* @internal
* The generic data structure associated with each ethernet device.
@@ -807,8 +838,16 @@ struct rte_eth_dev {
enum rte_eth_dev_state state; /**< Flag indicating the port state */
void *security_ctx; /**< Context for security ops */
- uint64_t reserved_64s[4]; /**< Reserved for future fields */
- void *reserved_ptrs[4]; /**< Reserved for future fields */
+ /**< Empty poll number */
+ enum rte_eth_dev_power_mgmt_state pwr_mgmt_state;
+ enum rte_eth_dev_power_mgmt_cb_mode cb_mode;
+ uint32_t reserved_32;
+ uint64_t reserved_64s[3]; /**< Reserved for future fields */
+
+ /**< Flag indicating the port power state */
+ struct rte_eth_ep_stat *empty_poll_stats;
+ const struct rte_eth_rxtx_callback *cur_pwr_cb;
+ void *reserved_ptrs[3]; /**< Reserved for future fields */
} __rte_cache_aligned;
struct rte_eth_dev_sriov;
@@ -241,6 +241,10 @@ EXPERIMENTAL {
__rte_ethdev_trace_rx_burst;
__rte_ethdev_trace_tx_burst;
rte_flow_get_aged_flows;
+
+ # added in 20.08
+ rte_eth_dev_power_mgmt_disable;
+ rte_eth_dev_power_mgmt_enable;
};
INTERNAL {
@@ -14,17 +14,18 @@ libraries = [
'eal', # everything depends on eal
'ring',
'rcu', # rcu depends on ring
+ 'timer', # eventdev depends on this
+ 'power', # eventdev depends on this
'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core
'cmdline',
'metrics', # bitrate/latency stats depends on this
'hash', # efd depends on this
- 'timer', # eventdev depends on this
'acl', 'bbdev', 'bitratestats', 'cfgfile',
'compressdev', 'cryptodev',
'distributor', 'efd', 'eventdev',
'gro', 'gso', 'ip_frag', 'jobstats',
'kni', 'latencystats', 'lpm', 'member',
- 'power', 'pdump', 'rawdev', 'regexdev',
+ 'pdump', 'rawdev', 'regexdev',
'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
# ipsec lib depends on net, crypto and security
'ipsec',
@@ -58,7 +58,6 @@ endif
_LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS) += --no-whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE) += -lrte_bitratestats
_LDLIBS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += -lrte_latencystats
-_LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power
_LDLIBS-$(CONFIG_RTE_LIBRTE_EFD) += -lrte_efd
_LDLIBS-$(CONFIG_RTE_LIBRTE_BPF) += -lrte_bpf
@@ -80,6 +79,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_KVARGS) += -lrte_kvargs
_LDLIBS-y += -lrte_telemetry
_LDLIBS-$(CONFIG_RTE_LIBRTE_MBUF) += -lrte_mbuf
_LDLIBS-$(CONFIG_RTE_LIBRTE_NET) += -lrte_net
+_LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power
_LDLIBS-$(CONFIG_RTE_LIBRTE_ETHER) += -lrte_ethdev
_LDLIBS-$(CONFIG_RTE_LIBRTE_BBDEV) += -lrte_bbdev
_LDLIBS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += -lrte_cryptodev