[dpdk-dev,v2,07/10] librte_power common interface for Guest and Host

Message ID 1411579576-21786-8-git-send-email-alan.carew@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Alan Carew Sept. 24, 2014, 5:26 p.m. UTC
Moved the current librte_power implementation to rte_power_acpi_cpufreq, with
renaming of functions only.
Added rte_power_kvm_vm implmentation to support Power Management from a VM.

librte_power now hides the implementation based on the environment used.
A new call rte_power_set_env() can explicidly set the environment, if not
called then auto-detection takes place.

rte_power_kvm_vm is subset of the librte_power APIs, the following is supported:
 rte_power_init(unsigned lcore_id)
 rte_power_exit(unsigned lcore_id)
 rte_power_freq_up(unsigned lcore_id)
 rte_power_freq_down(unsigned lcore_id)
 rte_power_freq_min(unsigned lcore_id)
 rte_power_freq_max(unsigned lcore_id)

The other unsupported APIs return -ENOTSUP

Signed-off-by: Alan Carew <alan.carew@intel.com>
---
 lib/librte_power/rte_power.c              | 540 ++++-------------------------
 lib/librte_power/rte_power.h              | 120 +++++--
 lib/librte_power/rte_power_acpi_cpufreq.c | 545 ++++++++++++++++++++++++++++++
 lib/librte_power/rte_power_acpi_cpufreq.h | 192 +++++++++++
 lib/librte_power/rte_power_common.h       |  39 +++
 lib/librte_power/rte_power_kvm_vm.c       | 160 +++++++++
 lib/librte_power/rte_power_kvm_vm.h       | 179 ++++++++++
 7 files changed, 1273 insertions(+), 502 deletions(-)
 create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c
 create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h
 create mode 100644 lib/librte_power/rte_power_common.h
 create mode 100644 lib/librte_power/rte_power_kvm_vm.c
 create mode 100644 lib/librte_power/rte_power_kvm_vm.h
  

Comments

Neil Horman Sept. 25, 2014, 10:10 a.m. UTC | #1
On Wed, Sep 24, 2014 at 06:26:13PM +0100, Alan Carew wrote:
> Moved the current librte_power implementation to rte_power_acpi_cpufreq, with
> renaming of functions only.
> Added rte_power_kvm_vm implmentation to support Power Management from a VM.
> 
> librte_power now hides the implementation based on the environment used.
> A new call rte_power_set_env() can explicidly set the environment, if not
> called then auto-detection takes place.
> 
> rte_power_kvm_vm is subset of the librte_power APIs, the following is supported:
>  rte_power_init(unsigned lcore_id)
>  rte_power_exit(unsigned lcore_id)
>  rte_power_freq_up(unsigned lcore_id)
>  rte_power_freq_down(unsigned lcore_id)
>  rte_power_freq_min(unsigned lcore_id)
>  rte_power_freq_max(unsigned lcore_id)
> 
> The other unsupported APIs return -ENOTSUP
> 
> Signed-off-by: Alan Carew <alan.carew@intel.com>
> ---
>  lib/librte_power/rte_power.c              | 540 ++++-------------------------
>  lib/librte_power/rte_power.h              | 120 +++++--
>  lib/librte_power/rte_power_acpi_cpufreq.c | 545 ++++++++++++++++++++++++++++++
>  lib/librte_power/rte_power_acpi_cpufreq.h | 192 +++++++++++
>  lib/librte_power/rte_power_common.h       |  39 +++
>  lib/librte_power/rte_power_kvm_vm.c       | 160 +++++++++
>  lib/librte_power/rte_power_kvm_vm.h       | 179 ++++++++++
>  7 files changed, 1273 insertions(+), 502 deletions(-)
>  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c
>  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h
>  create mode 100644 lib/librte_power/rte_power_common.h
>  create mode 100644 lib/librte_power/rte_power_kvm_vm.c
>  create mode 100644 lib/librte_power/rte_power_kvm_vm.h
> 
> diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
> index 856da9a..998ed1c 100644
> --- a/lib/librte_power/rte_power.c
> +++ b/lib/librte_power/rte_power.c
> @@ -31,515 +31,113 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>   */
>  
> -#include <stdio.h>
> -#include <sys/types.h>
> -#include <sys/stat.h>
> -#include <fcntl.h>
> -#include <stdlib.h>
> -#include <string.h>
> -#include <unistd.h>
> -#include <signal.h>
> -#include <limits.h>
> -
> -#include <rte_memcpy.h>
>  #include <rte_atomic.h>
>  
>  #include "rte_power.h"
> +#include "rte_power_acpi_cpufreq.h"
> +#include "rte_power_kvm_vm.h"
> +#include "rte_power_common.h"
>  
> -#ifdef RTE_LIBRTE_POWER_DEBUG
> -#define POWER_DEBUG_TRACE(fmt, args...) do { \
> -		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
> -	} while (0)
> -#else
> -#define POWER_DEBUG_TRACE(fmt, args...)
> -#endif
> -
> -#define FOPEN_OR_ERR_RET(f, retval) do { \
> -	if ((f) == NULL) { \
> -		RTE_LOG(ERR, POWER, "File not openned\n"); \
> -		return (retval); \
> -	} \
> -} while(0)
> -
> -#define FOPS_OR_NULL_GOTO(ret, label) do { \
> -	if ((ret) == NULL) { \
> -		RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
> -		goto label; \
> -	} \
> -} while(0)
> -
> -#define FOPS_OR_ERR_GOTO(ret, label) do { \
> -	if ((ret) < 0) { \
> -		RTE_LOG(ERR, POWER, "File operations failed\n"); \
> -		goto label; \
> -	} \
> -} while(0)
> -
> -#define STR_SIZE     1024
> -#define POWER_CONVERT_TO_DECIMAL 10
> +enum power_management_env global_default_env = PM_ENV_NOT_SET;
>  
> -#define POWER_GOVERNOR_USERSPACE "userspace"
> -#define POWER_SYSFILE_GOVERNOR   \
> -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
> -#define POWER_SYSFILE_AVAIL_FREQ \
> -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
> -#define POWER_SYSFILE_SETSPEED   \
> -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
> +volatile uint32_t global_env_cfg_status = 0;
>  
> -enum power_state {
> -	POWER_IDLE = 0,
> -	POWER_ONGOING,
> -	POWER_USED,
> -	POWER_UNKNOWN
> -};
> +/* function pointers */
> +rte_power_freqs_t rte_power_freqs  = NULL;
> +rte_power_get_freq_t rte_power_get_freq = NULL;
> +rte_power_set_freq_t rte_power_set_freq = NULL;
> +rte_power_freq_change_t rte_power_freq_up = NULL;
> +rte_power_freq_change_t rte_power_freq_down = NULL;
> +rte_power_freq_change_t rte_power_freq_max = NULL;
> +rte_power_freq_change_t rte_power_freq_min = NULL;
>  
> -/**
> - * Power info per lcore.
> - */
> -struct rte_power_info {
> -	unsigned lcore_id;                   /**< Logical core id */
> -	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
> -	uint32_t nb_freqs;                   /**< number of available freqs */
> -	FILE *f;                             /**< FD of scaling_setspeed */
> -	char governor_ori[32];               /**< Original governor name */
> -	uint32_t curr_idx;                   /**< Freq index in freqs array */
> -	volatile uint32_t state;             /**< Power in use state */
> -} __rte_cache_aligned;
> -
> -static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
> -
> -/**
> - * It is to set specific freq for specific logical core, according to the index
> - * of supported frequencies.
> - */
> -static int
> -set_freq_internal(struct rte_power_info *pi, uint32_t idx)
> +int
> +rte_power_set_env(enum power_management_env env)
>  {
> -	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
> -		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
> -			"should be less than %u\n", idx, pi->nb_freqs);
> -		return -1;
> -	}
> -
> -	/* Check if it is the same as current */
> -	if (idx == pi->curr_idx)
> +	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
>  		return 0;
> -
1 Nit here.  If an invalid environment value is passed in on the first config
attempt here, you won't ever be able to set it.  Maybe add some logic to return
us to an initial state if a value env isn't selected?

Neil
  
Alan Carew Sept. 25, 2014, 5:06 p.m. UTC | #2
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Thursday, September 25, 2014 11:10 AM
> To: Carew, Alan
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2 07/10] librte_power common interface for Guest and
> Host
> 
> On Wed, Sep 24, 2014 at 06:26:13PM +0100, Alan Carew wrote:
> > Moved the current librte_power implementation to rte_power_acpi_cpufreq,
> with
> > renaming of functions only.
> > Added rte_power_kvm_vm implmentation to support Power Management
> from a VM.
> >
> > librte_power now hides the implementation based on the environment used.
> > A new call rte_power_set_env() can explicidly set the environment, if not
> > called then auto-detection takes place.
> >
> > rte_power_kvm_vm is subset of the librte_power APIs, the following is
> supported:
> >  rte_power_init(unsigned lcore_id)
> >  rte_power_exit(unsigned lcore_id)
> >  rte_power_freq_up(unsigned lcore_id)
> >  rte_power_freq_down(unsigned lcore_id)
> >  rte_power_freq_min(unsigned lcore_id)
> >  rte_power_freq_max(unsigned lcore_id)
> >
> > The other unsupported APIs return -ENOTSUP
> >
> > Signed-off-by: Alan Carew <alan.carew@intel.com>
> > ---
> >  lib/librte_power/rte_power.c              | 540 ++++-------------------------
> >  lib/librte_power/rte_power.h              | 120 +++++--
> >  lib/librte_power/rte_power_acpi_cpufreq.c | 545
> ++++++++++++++++++++++++++++++
> >  lib/librte_power/rte_power_acpi_cpufreq.h | 192 +++++++++++
> >  lib/librte_power/rte_power_common.h       |  39 +++
> >  lib/librte_power/rte_power_kvm_vm.c       | 160 +++++++++
> >  lib/librte_power/rte_power_kvm_vm.h       | 179 ++++++++++
> >  7 files changed, 1273 insertions(+), 502 deletions(-)
> >  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c
> >  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h
> >  create mode 100644 lib/librte_power/rte_power_common.h
> >  create mode 100644 lib/librte_power/rte_power_kvm_vm.c
> >  create mode 100644 lib/librte_power/rte_power_kvm_vm.h
> >
> > diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
> > index 856da9a..998ed1c 100644
> > --- a/lib/librte_power/rte_power.c
> > +++ b/lib/librte_power/rte_power.c
> > @@ -31,515 +31,113 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> >   */
> >
> > -#include <stdio.h>
> > -#include <sys/types.h>
> > -#include <sys/stat.h>
> > -#include <fcntl.h>
> > -#include <stdlib.h>
> > -#include <string.h>
> > -#include <unistd.h>
> > -#include <signal.h>
> > -#include <limits.h>
> > -
> > -#include <rte_memcpy.h>
> >  #include <rte_atomic.h>
> >
> >  #include "rte_power.h"
> > +#include "rte_power_acpi_cpufreq.h"
> > +#include "rte_power_kvm_vm.h"
> > +#include "rte_power_common.h"
> >
> > -#ifdef RTE_LIBRTE_POWER_DEBUG
> > -#define POWER_DEBUG_TRACE(fmt, args...) do { \
> > -		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
> > -	} while (0)
> > -#else
> > -#define POWER_DEBUG_TRACE(fmt, args...)
> > -#endif
> > -
> > -#define FOPEN_OR_ERR_RET(f, retval) do { \
> > -	if ((f) == NULL) { \
> > -		RTE_LOG(ERR, POWER, "File not openned\n"); \
> > -		return (retval); \
> > -	} \
> > -} while(0)
> > -
> > -#define FOPS_OR_NULL_GOTO(ret, label) do { \
> > -	if ((ret) == NULL) { \
> > -		RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
> > -		goto label; \
> > -	} \
> > -} while(0)
> > -
> > -#define FOPS_OR_ERR_GOTO(ret, label) do { \
> > -	if ((ret) < 0) { \
> > -		RTE_LOG(ERR, POWER, "File operations failed\n"); \
> > -		goto label; \
> > -	} \
> > -} while(0)
> > -
> > -#define STR_SIZE     1024
> > -#define POWER_CONVERT_TO_DECIMAL 10
> > +enum power_management_env global_default_env = PM_ENV_NOT_SET;
> >
> > -#define POWER_GOVERNOR_USERSPACE "userspace"
> > -#define POWER_SYSFILE_GOVERNOR   \
> > -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
> > -#define POWER_SYSFILE_AVAIL_FREQ \
> > -
> 	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencie
> s"
> > -#define POWER_SYSFILE_SETSPEED   \
> > -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
> > +volatile uint32_t global_env_cfg_status = 0;
> >
> > -enum power_state {
> > -	POWER_IDLE = 0,
> > -	POWER_ONGOING,
> > -	POWER_USED,
> > -	POWER_UNKNOWN
> > -};
> > +/* function pointers */
> > +rte_power_freqs_t rte_power_freqs  = NULL;
> > +rte_power_get_freq_t rte_power_get_freq = NULL;
> > +rte_power_set_freq_t rte_power_set_freq = NULL;
> > +rte_power_freq_change_t rte_power_freq_up = NULL;
> > +rte_power_freq_change_t rte_power_freq_down = NULL;
> > +rte_power_freq_change_t rte_power_freq_max = NULL;
> > +rte_power_freq_change_t rte_power_freq_min = NULL;
> >
> > -/**
> > - * Power info per lcore.
> > - */
> > -struct rte_power_info {
> > -	unsigned lcore_id;                   /**< Logical core id */
> > -	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
> > -	uint32_t nb_freqs;                   /**< number of available freqs */
> > -	FILE *f;                             /**< FD of scaling_setspeed */
> > -	char governor_ori[32];               /**< Original governor name */
> > -	uint32_t curr_idx;                   /**< Freq index in freqs array */
> > -	volatile uint32_t state;             /**< Power in use state */
> > -} __rte_cache_aligned;
> > -
> > -static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
> > -
> > -/**
> > - * It is to set specific freq for specific logical core, according to the index
> > - * of supported frequencies.
> > - */
> > -static int
> > -set_freq_internal(struct rte_power_info *pi, uint32_t idx)
> > +int
> > +rte_power_set_env(enum power_management_env env)
> >  {
> > -	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
> > -		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
> > -			"should be less than %u\n", idx, pi->nb_freqs);
> > -		return -1;
> > -	}
> > -
> > -	/* Check if it is the same as current */
> > -	if (idx == pi->curr_idx)
> > +	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
> >  		return 0;
> > -
> 1 Nit here.  If an invalid environment value is passed in on the first config
> attempt here, you won't ever be able to set it.  Maybe add some logic to return
> us to an initial state if a value env isn't selected?
> 
> Neil

Hi Neil,

I should have called it out in the commit, but there's also a rte_power_unset_env()
function that resets the environment that allows for retrying a different environment.
rte_power_unset_env() is also called when an invalid configuration is set.

Thanks,
Alan.
  
Neil Horman Sept. 25, 2014, 5:49 p.m. UTC | #3
On Thu, Sep 25, 2014 at 05:06:11PM +0000, Carew, Alan wrote:
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Thursday, September 25, 2014 11:10 AM
> > To: Carew, Alan
> > Cc: dev@dpdk.org
> > Subject: Re: [PATCH v2 07/10] librte_power common interface for Guest and
> > Host
> > 
> > On Wed, Sep 24, 2014 at 06:26:13PM +0100, Alan Carew wrote:
> > > Moved the current librte_power implementation to rte_power_acpi_cpufreq,
> > with
> > > renaming of functions only.
> > > Added rte_power_kvm_vm implmentation to support Power Management
> > from a VM.
> > >
> > > librte_power now hides the implementation based on the environment used.
> > > A new call rte_power_set_env() can explicidly set the environment, if not
> > > called then auto-detection takes place.
> > >
> > > rte_power_kvm_vm is subset of the librte_power APIs, the following is
> > supported:
> > >  rte_power_init(unsigned lcore_id)
> > >  rte_power_exit(unsigned lcore_id)
> > >  rte_power_freq_up(unsigned lcore_id)
> > >  rte_power_freq_down(unsigned lcore_id)
> > >  rte_power_freq_min(unsigned lcore_id)
> > >  rte_power_freq_max(unsigned lcore_id)
> > >
> > > The other unsupported APIs return -ENOTSUP
> > >
> > > Signed-off-by: Alan Carew <alan.carew@intel.com>
> > > ---
> > >  lib/librte_power/rte_power.c              | 540 ++++-------------------------
> > >  lib/librte_power/rte_power.h              | 120 +++++--
> > >  lib/librte_power/rte_power_acpi_cpufreq.c | 545
> > ++++++++++++++++++++++++++++++
> > >  lib/librte_power/rte_power_acpi_cpufreq.h | 192 +++++++++++
> > >  lib/librte_power/rte_power_common.h       |  39 +++
> > >  lib/librte_power/rte_power_kvm_vm.c       | 160 +++++++++
> > >  lib/librte_power/rte_power_kvm_vm.h       | 179 ++++++++++
> > >  7 files changed, 1273 insertions(+), 502 deletions(-)
> > >  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c
> > >  create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h
> > >  create mode 100644 lib/librte_power/rte_power_common.h
> > >  create mode 100644 lib/librte_power/rte_power_kvm_vm.c
> > >  create mode 100644 lib/librte_power/rte_power_kvm_vm.h
> > >
> > > diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
> > > index 856da9a..998ed1c 100644
> > > --- a/lib/librte_power/rte_power.c
> > > +++ b/lib/librte_power/rte_power.c
> > > @@ -31,515 +31,113 @@
> > >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> > >   */
> > >
> > > -#include <stdio.h>
> > > -#include <sys/types.h>
> > > -#include <sys/stat.h>
> > > -#include <fcntl.h>
> > > -#include <stdlib.h>
> > > -#include <string.h>
> > > -#include <unistd.h>
> > > -#include <signal.h>
> > > -#include <limits.h>
> > > -
> > > -#include <rte_memcpy.h>
> > >  #include <rte_atomic.h>
> > >
> > >  #include "rte_power.h"
> > > +#include "rte_power_acpi_cpufreq.h"
> > > +#include "rte_power_kvm_vm.h"
> > > +#include "rte_power_common.h"
> > >
> > > -#ifdef RTE_LIBRTE_POWER_DEBUG
> > > -#define POWER_DEBUG_TRACE(fmt, args...) do { \
> > > -		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
> > > -	} while (0)
> > > -#else
> > > -#define POWER_DEBUG_TRACE(fmt, args...)
> > > -#endif
> > > -
> > > -#define FOPEN_OR_ERR_RET(f, retval) do { \
> > > -	if ((f) == NULL) { \
> > > -		RTE_LOG(ERR, POWER, "File not openned\n"); \
> > > -		return (retval); \
> > > -	} \
> > > -} while(0)
> > > -
> > > -#define FOPS_OR_NULL_GOTO(ret, label) do { \
> > > -	if ((ret) == NULL) { \
> > > -		RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
> > > -		goto label; \
> > > -	} \
> > > -} while(0)
> > > -
> > > -#define FOPS_OR_ERR_GOTO(ret, label) do { \
> > > -	if ((ret) < 0) { \
> > > -		RTE_LOG(ERR, POWER, "File operations failed\n"); \
> > > -		goto label; \
> > > -	} \
> > > -} while(0)
> > > -
> > > -#define STR_SIZE     1024
> > > -#define POWER_CONVERT_TO_DECIMAL 10
> > > +enum power_management_env global_default_env = PM_ENV_NOT_SET;
> > >
> > > -#define POWER_GOVERNOR_USERSPACE "userspace"
> > > -#define POWER_SYSFILE_GOVERNOR   \
> > > -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
> > > -#define POWER_SYSFILE_AVAIL_FREQ \
> > > -
> > 	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencie
> > s"
> > > -#define POWER_SYSFILE_SETSPEED   \
> > > -	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
> > > +volatile uint32_t global_env_cfg_status = 0;
> > >
> > > -enum power_state {
> > > -	POWER_IDLE = 0,
> > > -	POWER_ONGOING,
> > > -	POWER_USED,
> > > -	POWER_UNKNOWN
> > > -};
> > > +/* function pointers */
> > > +rte_power_freqs_t rte_power_freqs  = NULL;
> > > +rte_power_get_freq_t rte_power_get_freq = NULL;
> > > +rte_power_set_freq_t rte_power_set_freq = NULL;
> > > +rte_power_freq_change_t rte_power_freq_up = NULL;
> > > +rte_power_freq_change_t rte_power_freq_down = NULL;
> > > +rte_power_freq_change_t rte_power_freq_max = NULL;
> > > +rte_power_freq_change_t rte_power_freq_min = NULL;
> > >
> > > -/**
> > > - * Power info per lcore.
> > > - */
> > > -struct rte_power_info {
> > > -	unsigned lcore_id;                   /**< Logical core id */
> > > -	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
> > > -	uint32_t nb_freqs;                   /**< number of available freqs */
> > > -	FILE *f;                             /**< FD of scaling_setspeed */
> > > -	char governor_ori[32];               /**< Original governor name */
> > > -	uint32_t curr_idx;                   /**< Freq index in freqs array */
> > > -	volatile uint32_t state;             /**< Power in use state */
> > > -} __rte_cache_aligned;
> > > -
> > > -static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
> > > -
> > > -/**
> > > - * It is to set specific freq for specific logical core, according to the index
> > > - * of supported frequencies.
> > > - */
> > > -static int
> > > -set_freq_internal(struct rte_power_info *pi, uint32_t idx)
> > > +int
> > > +rte_power_set_env(enum power_management_env env)
> > >  {
> > > -	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
> > > -		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
> > > -			"should be less than %u\n", idx, pi->nb_freqs);
> > > -		return -1;
> > > -	}
> > > -
> > > -	/* Check if it is the same as current */
> > > -	if (idx == pi->curr_idx)
> > > +	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
> > >  		return 0;
> > > -
> > 1 Nit here.  If an invalid environment value is passed in on the first config
> > attempt here, you won't ever be able to set it.  Maybe add some logic to return
> > us to an initial state if a value env isn't selected?
> > 
> > Neil
> 
> Hi Neil,
> 
> I should have called it out in the commit, but there's also a rte_power_unset_env()
> function that resets the environment that allows for retrying a different environment.
> rte_power_unset_env() is also called when an invalid configuration is set.
> 
> Thanks,
> Alan.
> 
Ok, that seems like an odd interface too me, but it works as well as anything
else.

Thanks!
Neil

>
  

Patch

diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 856da9a..998ed1c 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -31,515 +31,113 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <signal.h>
-#include <limits.h>
-
-#include <rte_memcpy.h>
 #include <rte_atomic.h>
 
 #include "rte_power.h"
+#include "rte_power_acpi_cpufreq.h"
+#include "rte_power_kvm_vm.h"
+#include "rte_power_common.h"
 
-#ifdef RTE_LIBRTE_POWER_DEBUG
-#define POWER_DEBUG_TRACE(fmt, args...) do { \
-		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
-	} while (0)
-#else
-#define POWER_DEBUG_TRACE(fmt, args...)
-#endif
-
-#define FOPEN_OR_ERR_RET(f, retval) do { \
-	if ((f) == NULL) { \
-		RTE_LOG(ERR, POWER, "File not openned\n"); \
-		return (retval); \
-	} \
-} while(0)
-
-#define FOPS_OR_NULL_GOTO(ret, label) do { \
-	if ((ret) == NULL) { \
-		RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
-		goto label; \
-	} \
-} while(0)
-
-#define FOPS_OR_ERR_GOTO(ret, label) do { \
-	if ((ret) < 0) { \
-		RTE_LOG(ERR, POWER, "File operations failed\n"); \
-		goto label; \
-	} \
-} while(0)
-
-#define STR_SIZE     1024
-#define POWER_CONVERT_TO_DECIMAL 10
+enum power_management_env global_default_env = PM_ENV_NOT_SET;
 
-#define POWER_GOVERNOR_USERSPACE "userspace"
-#define POWER_SYSFILE_GOVERNOR   \
-	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
-#define POWER_SYSFILE_AVAIL_FREQ \
-	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
-#define POWER_SYSFILE_SETSPEED   \
-	"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
+volatile uint32_t global_env_cfg_status = 0;
 
-enum power_state {
-	POWER_IDLE = 0,
-	POWER_ONGOING,
-	POWER_USED,
-	POWER_UNKNOWN
-};
+/* function pointers */
+rte_power_freqs_t rte_power_freqs  = NULL;
+rte_power_get_freq_t rte_power_get_freq = NULL;
+rte_power_set_freq_t rte_power_set_freq = NULL;
+rte_power_freq_change_t rte_power_freq_up = NULL;
+rte_power_freq_change_t rte_power_freq_down = NULL;
+rte_power_freq_change_t rte_power_freq_max = NULL;
+rte_power_freq_change_t rte_power_freq_min = NULL;
 
-/**
- * Power info per lcore.
- */
-struct rte_power_info {
-	unsigned lcore_id;                   /**< Logical core id */
-	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
-	uint32_t nb_freqs;                   /**< number of available freqs */
-	FILE *f;                             /**< FD of scaling_setspeed */
-	char governor_ori[32];               /**< Original governor name */
-	uint32_t curr_idx;                   /**< Freq index in freqs array */
-	volatile uint32_t state;             /**< Power in use state */
-} __rte_cache_aligned;
-
-static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
-
-/**
- * It is to set specific freq for specific logical core, according to the index
- * of supported frequencies.
- */
-static int
-set_freq_internal(struct rte_power_info *pi, uint32_t idx)
+int
+rte_power_set_env(enum power_management_env env)
 {
-	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
-		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
-			"should be less than %u\n", idx, pi->nb_freqs);
-		return -1;
-	}
-
-	/* Check if it is the same as current */
-	if (idx == pi->curr_idx)
+	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
 		return 0;
-
-	POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
-				idx, pi->freqs[idx], pi->lcore_id);
-	if (fseek(pi->f, 0, SEEK_SET) < 0) {
-		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
-			"for setting frequency for lcore %u\n", pi->lcore_id);
-		return -1;
 	}
-	if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
-		RTE_LOG(ERR, POWER, "Fail to write new frequency for "
-					"lcore %u\n", pi->lcore_id);
+	if (env == PM_ENV_ACPI_CPUFREQ) {
+		rte_power_freqs = rte_power_acpi_cpufreq_freqs;
+		rte_power_get_freq = rte_power_acpi_cpufreq_get_freq;
+		rte_power_set_freq = rte_power_acpi_cpufreq_set_freq;
+		rte_power_freq_up = rte_power_acpi_cpufreq_freq_up;
+		rte_power_freq_down = rte_power_acpi_cpufreq_freq_down;
+		rte_power_freq_min = rte_power_acpi_cpufreq_freq_min;
+		rte_power_freq_max = rte_power_acpi_cpufreq_freq_max;
+	} else if (env == PM_ENV_KVM_VM) {
+		rte_power_freqs = rte_power_kvm_vm_freqs;
+		rte_power_get_freq = rte_power_kvm_vm_get_freq;
+		rte_power_set_freq = rte_power_kvm_vm_set_freq;
+		rte_power_freq_up = rte_power_kvm_vm_freq_up;
+		rte_power_freq_down = rte_power_kvm_vm_freq_down;
+		rte_power_freq_min = rte_power_kvm_vm_freq_min;
+		rte_power_freq_max = rte_power_kvm_vm_freq_max;
+	} else {
+		RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
+				env);
+		rte_power_unset_env();
 		return -1;
 	}
-	fflush(pi->f);
-	pi->curr_idx = idx;
-
-	return 1;
-}
-
-/**
- * It is to check the current scaling governor by reading sys file, and then
- * set it into 'userspace' if it is not by writing the sys file. The original
- * governor will be saved for rolling back.
- */
-static int
-power_set_governor_userspace(struct rte_power_info *pi)
-{
-	FILE *f;
-	int ret = -1;
-	char buf[BUFSIZ];
-	char fullpath[PATH_MAX];
-	char *s;
-	int val;
-
-	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
-							pi->lcore_id);
-	f = fopen(fullpath, "rw+");
-	FOPEN_OR_ERR_RET(f, ret);
-
-	s = fgets(buf, sizeof(buf), f);
-	FOPS_OR_NULL_GOTO(s, out);
-
-	/* Check if current governor is userspace */
-	if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
-		sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
-		ret = 0;
-		POWER_DEBUG_TRACE("Power management governor of lcore %u is "
-					"already userspace\n", pi->lcore_id);
-		goto out;
-	}
-	/* Save the original governor */
-	snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
-
-	/* Write 'userspace' to the governor */
-	val = fseek(f, 0, SEEK_SET);
-	FOPS_OR_ERR_GOTO(val, out);
-
-	val = fputs(POWER_GOVERNOR_USERSPACE, f);
-	FOPS_OR_ERR_GOTO(val, out);
-
-	ret = 0;
-	RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
-			"set to user space successfully\n", pi->lcore_id);
-out:
-	fclose(f);
+	global_default_env = env;
+	return 0;
 
-	return ret;
 }
 
-/**
- * It is to get the available frequencies of the specific lcore by reading the
- * sys file.
- */
-static int
-power_get_available_freqs(struct rte_power_info *pi)
+void
+rte_power_unset_env(void)
 {
-	FILE *f;
-	int ret = -1, i, count;
-	char *p;
-	char buf[BUFSIZ];
-	char fullpath[PATH_MAX];
-	char *freqs[RTE_MAX_LCORE_FREQS];
-	char *s;
-
-	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
-								pi->lcore_id);
-	f = fopen(fullpath, "r");
-	FOPEN_OR_ERR_RET(f, ret);
-
-	s = fgets(buf, sizeof(buf), f);
-	FOPS_OR_NULL_GOTO(s, out);
-
-	/* Strip the line break if there is */
-	p = strchr(buf, '\n');
-	if (p != NULL)
-		*p = 0;
-
-	/* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
-	count = rte_strsplit(buf, sizeof(buf), freqs,
-				RTE_MAX_LCORE_FREQS, ' ');
-	if (count <= 0) {
-		RTE_LOG(ERR, POWER, "No available frequency in "
-			""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
-		goto out;
-	}
-	if (count >= RTE_MAX_LCORE_FREQS) {
-		RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
-								count);
-		goto out;
-	}
-
-	/* Store the available frequncies into power context */
-	for (i = 0, pi->nb_freqs = 0; i < count; i++) {
-		POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
-								i, freqs[i]);
-		pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
-					POWER_CONVERT_TO_DECIMAL);
-	}
-
-	ret = 0;
-	POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
-						count, pi->lcore_id);
-out:
-	fclose(f);
-
-	return ret;
+	if (rte_atomic32_cmpset(&global_env_cfg_status, 1, 0) != 0)
+		global_default_env = PM_ENV_NOT_SET;
 }
 
-/**
- * It is to fopen the sys file for the future setting the lcore frequency.
- */
-static int
-power_init_for_setting_freq(struct rte_power_info *pi)
-{
-	FILE *f;
-	char fullpath[PATH_MAX];
-	char buf[BUFSIZ];
-	uint32_t i, freq;
-	char *s;
-
-	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
-							pi->lcore_id);
-	f = fopen(fullpath, "rw+");
-	FOPEN_OR_ERR_RET(f, -1);
-
-	s = fgets(buf, sizeof(buf), f);
-	FOPS_OR_NULL_GOTO(s, out);
-
-	freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
-	for (i = 0; i < pi->nb_freqs; i++) {
-		if (freq == pi->freqs[i]) {
-			pi->curr_idx = i;
-			pi->f = f;
-			return 0;
-		}
-	}
-
-out:
-	fclose(f);
-
-	return -1;
+enum power_management_env
+rte_power_get_env(void) {
+	return global_default_env;
 }
 
 int
 rte_power_init(unsigned lcore_id)
 {
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
-					lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-
-	pi = &lcore_power_info[lcore_id];
-	if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
-								== 0) {
-		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
-						"in use\n", lcore_id);
-		return -1;
-	}
-
-	pi->lcore_id = lcore_id;
-	/* Check and set the governor */
-	if (power_set_governor_userspace(pi) < 0) {
-		RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
-						"userspace\n", lcore_id);
-		goto fail;
-	}
+	int ret = -1;
 
-	/* Get the available frequencies */
-	if (power_get_available_freqs(pi) < 0) {
-		RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
-						"lcore %u\n", lcore_id);
-		goto fail;
+	if (global_default_env == PM_ENV_ACPI_CPUFREQ) {
+		return rte_power_acpi_cpufreq_init(lcore_id);
 	}
-
-	/* Init for setting lcore frequency */
-	if (power_init_for_setting_freq(pi) < 0) {
-		RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
-						"lcore %u\n", lcore_id);
-		goto fail;
+	if (global_default_env == PM_ENV_KVM_VM) {
+		return rte_power_kvm_vm_init(lcore_id);
 	}
-
-	/* Set freq to max by default */
-	if (rte_power_freq_max(lcore_id) < 0) {
-		RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
-						"to max\n", lcore_id);
-		goto fail;
+	/* Auto detect Environment */
+	RTE_LOG(INFO, POWER, "Attempting to initialise ACPI cpufreq power "
+			"management...\n");
+	ret = rte_power_acpi_cpufreq_init(lcore_id);
+	if (ret == 0) {
+		rte_power_set_env(PM_ENV_ACPI_CPUFREQ);
+		goto out;
 	}
 
-	RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
-					"power manamgement\n", lcore_id);
-	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
-
-	return 0;
-
-fail:
-	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
-
-	return -1;
-}
-
-/**
- * It is to check the governor and then set the original governor back if
- * needed by writing the the sys file.
- */
-static int
-power_set_governor_original(struct rte_power_info *pi)
-{
-	FILE *f;
-	int ret = -1;
-	char buf[BUFSIZ];
-	char fullpath[PATH_MAX];
-	char *s;
-	int val;
-
-	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
-							pi->lcore_id);
-	f = fopen(fullpath, "rw+");
-	FOPEN_OR_ERR_RET(f, ret);
-
-	s = fgets(buf, sizeof(buf), f);
-	FOPS_OR_NULL_GOTO(s, out);
-
-	/* Check if the governor to be set is the same as current */
-	if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
-		ret = 0;
-		POWER_DEBUG_TRACE("Power management governor of lcore %u "
-					"has already been set to %s\n",
-					pi->lcore_id, pi->governor_ori);
+	RTE_LOG(INFO, POWER, "Attempting to initialise VM power management...\n");
+	ret = rte_power_kvm_vm_init(lcore_id);
+	if (ret == 0) {
+		rte_power_set_env(PM_ENV_KVM_VM);
 		goto out;
 	}
-
-	/* Write back the original governor */
-	val = fseek(f, 0, SEEK_SET);
-	FOPS_OR_ERR_GOTO(val, out);
-
-	val = fputs(pi->governor_ori, f);
-	FOPS_OR_ERR_GOTO(val, out);
-
-	ret = 0;
-	RTE_LOG(INFO, POWER, "Power manamgement governor of lcore %u "
-				"has been set back to %s successfully\n",
-					pi->lcore_id, pi->governor_ori);
+	RTE_LOG(ERR, POWER, "Unable to set Power Management Environment for lcore "
+			"%u\n", lcore_id);
 out:
-	fclose(f);
-
 	return ret;
 }
 
 int
 rte_power_exit(unsigned lcore_id)
 {
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
-					lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-	pi = &lcore_power_info[lcore_id];
-	if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
-								== 0) {
-		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
-						"not used\n", lcore_id);
-		return -1;
-	}
-
-	/* Close FD of setting freq */
-	fclose(pi->f);
-	pi->f = NULL;
-
-	/* Set the governor back to the original */
-	if (power_set_governor_original(pi) < 0) {
-		RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
-					"to the original\n", lcore_id);
-		goto fail;
-	}
-
-	RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
-				"'userspace' mode and been set back to the "
-						"original\n", lcore_id);
-	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
-
-	return 0;
-
-fail:
-	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+	if (global_default_env == PM_ENV_ACPI_CPUFREQ)
+		return rte_power_acpi_cpufreq_exit(lcore_id);
+	if (global_default_env == PM_ENV_KVM_VM)
+		return rte_power_kvm_vm_exit(lcore_id);
 
+	RTE_LOG(ERR, POWER, "Environment has not been set, unable to exit "
+				"gracefully\n");
 	return -1;
-}
-
-uint32_t
-rte_power_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
-{
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE || !freqs) {
-		RTE_LOG(ERR, POWER, "Invalid input parameter\n");
-		return 0;
-	}
-
-	pi = &lcore_power_info[lcore_id];
-	if (num < pi->nb_freqs) {
-		RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
-		return 0;
-	}
-	rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
-
-	return pi->nb_freqs;
-}
-
-uint32_t
-rte_power_get_freq(unsigned lcore_id)
-{
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return RTE_POWER_INVALID_FREQ_INDEX;
-	}
-
-	return lcore_power_info[lcore_id].curr_idx;
-}
-
-int
-rte_power_set_freq(unsigned lcore_id, uint32_t index)
-{
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return -1;
-	}
-
-	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
-}
-
-int
-rte_power_freq_down(unsigned lcore_id)
-{
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return -1;
-	}
 
-	pi = &lcore_power_info[lcore_id];
-	if (pi->curr_idx + 1 == pi->nb_freqs)
-		return 0;
-
-	/* Frequencies in the array are from high to low. */
-	return set_freq_internal(pi, pi->curr_idx + 1);
 }
-
-int
-rte_power_freq_up(unsigned lcore_id)
-{
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return -1;
-	}
-
-	pi = &lcore_power_info[lcore_id];
-	if (pi->curr_idx == 0)
-		return 0;
-
-	/* Frequencies in the array are from high to low. */
-	return set_freq_internal(pi, pi->curr_idx - 1);
-}
-
-int
-rte_power_freq_max(unsigned lcore_id)
-{
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return -1;
-	}
-
-	/* Frequencies in the array are from high to low. */
-	return set_freq_internal(&lcore_power_info[lcore_id], 0);
-}
-
-int
-rte_power_freq_min(unsigned lcore_id)
-{
-	struct rte_power_info *pi;
-
-	if (lcore_id >= RTE_MAX_LCORE) {
-		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
-		return -1;
-	}
-
-	pi = &lcore_power_info[lcore_id];
-
-	/* Frequencies in the array are from high to low. */
-	return set_freq_internal(pi, pi->nb_freqs - 1);
-}
-
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index 9c1419e..9338069 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -48,12 +48,48 @@ 
 extern "C" {
 #endif
 
-#define RTE_POWER_INVALID_FREQ_INDEX (~0)
+/* Power Management Environment State */
+enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM};
 
 /**
- * Initialize power management for a specific lcore. It will check and set the
- * governor to userspace for the lcore, get the available frequencies, and
- * prepare to set new lcore frequency.
+ * Set the default power management implementation. If this is not called prior
+ * to rte_power_init(), then auto-detect of the environment will take place.
+ * It is not thread safe.
+ *
+ * @param env
+ *  env. The environment in which to initialise Power Management for.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_set_env(enum power_management_env env);
+
+/**
+ * Unset the global environment configuration.
+ * This can only be called after all threads have completed.
+ *
+ * @param None.
+ *
+ * @return
+ *  None.
+ */
+void rte_power_unset_env(void);
+
+/**
+ * Get the default power management implementation.
+ *
+ * @param None.
+ *
+ * @return
+ *  power_management_env The configured environment.
+ */
+enum power_management_env rte_power_get_env(void);
+
+/**
+ * Initialize power management for a specific lcore. If rte_power_set_env() has
+ * not been called then an auto-detect of the environment will start and
+ * initialise the corresponding resources.
  *
  * @param lcore_id
  *  lcore id.
@@ -65,8 +101,9 @@  extern "C" {
 int rte_power_init(unsigned lcore_id);
 
 /**
- * Exit power management on a specific lcore. It will set the governor to which
- * is before initialized.
+ * Exit power management on a specific lcore. This will call the environment
+ * dependent exit function.
+ *
  *
  * @param lcore_id
  *  lcore id.
@@ -78,11 +115,9 @@  int rte_power_init(unsigned lcore_id);
 int rte_power_exit(unsigned lcore_id);
 
 /**
- * Get the available frequencies of a specific lcore. The return value will be
- * the minimal one of the total number of available frequencies and the number
- * of buffer. The index of available frequencies used in other interfaces
- * should be in the range of 0 to this return value.
- * It should be protected outside of this function for threadsafe.
+ * Get the available frequencies of a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
@@ -94,12 +129,15 @@  int rte_power_exit(unsigned lcore_id);
  * @return
  *  The number of available frequencies.
  */
-uint32_t rte_power_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num);
+typedef uint32_t (*rte_power_freqs_t)(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+extern rte_power_freqs_t rte_power_freqs;
 
 /**
- * Return the current index of available frequencies of a specific lcore. It
- * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error.
- * It should be protected outside of this function for threadsafe.
+ * Return the current index of available frequencies of a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
@@ -107,12 +145,15 @@  uint32_t rte_power_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num);
  * @return
  *  The current index of available frequencies.
  */
-uint32_t rte_power_get_freq(unsigned lcore_id);
+typedef uint32_t (*rte_power_get_freq_t)(unsigned lcore_id);
+
+extern rte_power_get_freq_t rte_power_get_freq;
 
 /**
  * Set the new frequency for a specific lcore by indicating the index of
  * available frequencies.
- * It should be protected outside of this function for threadsafe.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
@@ -121,70 +162,87 @@  uint32_t rte_power_get_freq(unsigned lcore_id);
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
-int rte_power_set_freq(unsigned lcore_id, uint32_t index);
+typedef int (*rte_power_set_freq_t)(unsigned lcore_id, uint32_t index);
+
+extern rte_power_set_freq_t rte_power_set_freq;
+
+/**
+ * Function pointer definition for generic frequency change functions. Review
+ * each environments specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+typedef int (*rte_power_freq_change_t)(unsigned lcore_id);
 
 /**
  * Scale up the frequency of a specific lcore according to the available
  * frequencies.
- * It should be protected outside of this function for threadsafe.
+ * Review each environments specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
-int rte_power_freq_up(unsigned lcore_id);
+extern rte_power_freq_change_t rte_power_freq_up;
 
 /**
  * Scale down the frequency of a specific lcore according to the available
  * frequencies.
- * It should be protected outside of this function for threadsafe.
+ * Review each environments specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
-int rte_power_freq_down(unsigned lcore_id);
+
+extern rte_power_freq_change_t rte_power_freq_down;
 
 /**
  * Scale up the frequency of a specific lcore to the highest according to the
  * available frequencies.
- * It should be protected outside of this function for threadsafe.
+ * Review each environments specific documentation for usage.
  *
  * @param lcore_id
  *  lcore id.
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
-int rte_power_freq_max(unsigned lcore_id);
+extern rte_power_freq_change_t rte_power_freq_max;
 
 /**
  * Scale down the frequency of a specific lcore to the lowest according to the
  * available frequencies.
- * It should be protected outside of this function for threadsafe.
+ * Review each environments specific documentation for usage..
  *
  * @param lcore_id
  *  lcore id.
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
-int rte_power_freq_min(unsigned lcore_id);
+rte_power_freq_change_t rte_power_freq_min;
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c b/lib/librte_power/rte_power_acpi_cpufreq.c
new file mode 100644
index 0000000..09085c3
--- /dev/null
+++ b/lib/librte_power/rte_power_acpi_cpufreq.c
@@ -0,0 +1,545 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <limits.h>
+
+#include <rte_memcpy.h>
+#include <rte_atomic.h>
+
+#include "rte_power_acpi_cpufreq.h"
+#include "rte_power_common.h"
+
+#ifdef RTE_LIBRTE_POWER_DEBUG
+#define POWER_DEBUG_TRACE(fmt, args...) do { \
+		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
+} while (0)
+#else
+#define POWER_DEBUG_TRACE(fmt, args...)
+#endif
+
+#define FOPEN_OR_ERR_RET(f, retval) do { \
+		if ((f) == NULL) { \
+			RTE_LOG(ERR, POWER, "File not openned\n"); \
+			return retval; \
+		} \
+} while (0)
+
+#define FOPS_OR_NULL_GOTO(ret, label) do { \
+		if ((ret) == NULL) { \
+			RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
+			goto label; \
+		} \
+} while (0)
+
+#define FOPS_OR_ERR_GOTO(ret, label) do { \
+		if ((ret) < 0) { \
+			RTE_LOG(ERR, POWER, "File operations failed\n"); \
+			goto label; \
+		} \
+} while (0)
+
+#define STR_SIZE     1024
+#define POWER_CONVERT_TO_DECIMAL 10
+
+#define POWER_GOVERNOR_USERSPACE "userspace"
+#define POWER_SYSFILE_GOVERNOR   \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
+#define POWER_SYSFILE_AVAIL_FREQ \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
+#define POWER_SYSFILE_SETSPEED   \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
+
+enum power_state {
+	POWER_IDLE = 0,
+	POWER_ONGOING,
+	POWER_USED,
+	POWER_UNKNOWN
+};
+
+/**
+ * Power info per lcore.
+ */
+struct rte_power_info {
+	unsigned lcore_id;                   /**< Logical core id */
+	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
+	uint32_t nb_freqs;                   /**< number of available freqs */
+	FILE *f;                             /**< FD of scaling_setspeed */
+	char governor_ori[32];               /**< Original governor name */
+	uint32_t curr_idx;                   /**< Freq index in freqs array */
+	volatile uint32_t state;             /**< Power in use state */
+} __rte_cache_aligned;
+
+static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
+
+/**
+ * It is to set specific freq for specific logical core, according to the index
+ * of supported frequencies.
+ */
+static int
+set_freq_internal(struct rte_power_info *pi, uint32_t idx)
+{
+	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
+		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
+				"should be less than %u\n", idx, pi->nb_freqs);
+		return -1;
+	}
+
+	/* Check if it is the same as current */
+	if (idx == pi->curr_idx)
+		return 0;
+
+	POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
+			idx, pi->freqs[idx], pi->lcore_id);
+	if (fseek(pi->f, 0, SEEK_SET) < 0) {
+		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
+				"for setting frequency for lcore %u\n", pi->lcore_id);
+		return -1;
+	}
+	if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
+		RTE_LOG(ERR, POWER, "Fail to write new frequency for "
+				"lcore %u\n", pi->lcore_id);
+		return -1;
+	}
+	fflush(pi->f);
+	pi->curr_idx = idx;
+
+	return 1;
+}
+
+/**
+ * It is to check the current scaling governor by reading sys file, and then
+ * set it into 'userspace' if it is not by writing the sys file. The original
+ * governor will be saved for rolling back.
+ */
+static int
+power_set_governor_userspace(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *s;
+	int val;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Check if current governor is userspace */
+	if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
+			sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
+		ret = 0;
+		POWER_DEBUG_TRACE("Power management governor of lcore %u is "
+				"already userspace\n", pi->lcore_id);
+		goto out;
+	}
+	/* Save the original governor */
+	snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
+
+	/* Write 'userspace' to the governor */
+	val = fseek(f, 0, SEEK_SET);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	val = fputs(POWER_GOVERNOR_USERSPACE, f);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	ret = 0;
+	RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
+			"set to user space successfully\n", pi->lcore_id);
+	out:
+	fclose(f);
+
+	return ret;
+}
+
+/**
+ * It is to get the available frequencies of the specific lcore by reading the
+ * sys file.
+ */
+static int
+power_get_available_freqs(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1, i, count;
+	char *p;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *freqs[RTE_MAX_LCORE_FREQS];
+	char *s;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
+			pi->lcore_id);
+	f = fopen(fullpath, "r");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Strip the line break if there is */
+	p = strchr(buf, '\n');
+	if (p != NULL)
+		*p = 0;
+
+	/* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
+	count = rte_strsplit(buf, sizeof(buf), freqs,
+			RTE_MAX_LCORE_FREQS, ' ');
+	if (count <= 0) {
+		RTE_LOG(ERR, POWER, "No available frequency in "
+				""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
+		goto out;
+	}
+	if (count >= RTE_MAX_LCORE_FREQS) {
+		RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
+				count);
+		goto out;
+	}
+
+	/* Store the available frequncies into power context */
+	for (i = 0, pi->nb_freqs = 0; i < count; i++) {
+		POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
+				i, freqs[i]);
+		pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
+				POWER_CONVERT_TO_DECIMAL);
+	}
+
+	ret = 0;
+	POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
+			count, pi->lcore_id);
+	out:
+	fclose(f);
+
+	return ret;
+}
+
+/**
+ * It is to fopen the sys file for the future setting the lcore frequency.
+ */
+static int
+power_init_for_setting_freq(struct rte_power_info *pi)
+{
+	FILE *f;
+	char fullpath[PATH_MAX];
+	char buf[BUFSIZ];
+	uint32_t i, freq;
+	char *s;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, -1);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
+	for (i = 0; i < pi->nb_freqs; i++) {
+		if (freq == pi->freqs[i]) {
+			pi->curr_idx = i;
+			pi->f = f;
+			return 0;
+		}
+	}
+
+	out:
+	fclose(f);
+
+	return -1;
+}
+
+int
+rte_power_acpi_cpufreq_init(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
+				lcore_id, RTE_MAX_LCORE - 1U);
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
+			== 0) {
+		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
+				"in use\n", lcore_id);
+		return -1;
+	}
+
+	pi->lcore_id = lcore_id;
+	/* Check and set the governor */
+	if (power_set_governor_userspace(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
+				"userspace\n", lcore_id);
+		goto fail;
+	}
+
+	/* Get the available frequencies */
+	if (power_get_available_freqs(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
+				"lcore %u\n", lcore_id);
+		goto fail;
+	}
+
+	/* Init for setting lcore frequency */
+	if (power_init_for_setting_freq(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
+				"lcore %u\n", lcore_id);
+		goto fail;
+	}
+
+	/* Set freq to max by default */
+	if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
+				"to max\n", lcore_id);
+		goto fail;
+	}
+
+	RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
+			"power manamgement\n", lcore_id);
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
+
+	return 0;
+
+	fail:
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+
+	return -1;
+}
+
+/**
+ * It is to check the governor and then set the original governor back if
+ * needed by writing the the sys file.
+ */
+static int
+power_set_governor_original(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *s;
+	int val;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Check if the governor to be set is the same as current */
+	if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
+		ret = 0;
+		POWER_DEBUG_TRACE("Power management governor of lcore %u "
+				"has already been set to %s\n",
+				pi->lcore_id, pi->governor_ori);
+		goto out;
+	}
+
+	/* Write back the original governor */
+	val = fseek(f, 0, SEEK_SET);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	val = fputs(pi->governor_ori, f);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	ret = 0;
+	RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
+			"has been set back to %s successfully\n",
+			pi->lcore_id, pi->governor_ori);
+	out:
+	fclose(f);
+
+	return ret;
+}
+
+int
+rte_power_acpi_cpufreq_exit(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
+				lcore_id, RTE_MAX_LCORE - 1U);
+		return -1;
+	}
+	pi = &lcore_power_info[lcore_id];
+	if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
+			== 0) {
+		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
+				"not used\n", lcore_id);
+		return -1;
+	}
+
+	/* Close FD of setting freq */
+	fclose(pi->f);
+	pi->f = NULL;
+
+	/* Set the governor back to the original */
+	if (power_set_governor_original(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
+				"to the original\n", lcore_id);
+		goto fail;
+	}
+
+	RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
+			"'userspace' mode and been set back to the "
+			"original\n", lcore_id);
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
+
+	return 0;
+
+	fail:
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+
+	return -1;
+}
+
+uint32_t
+rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE || !freqs) {
+		RTE_LOG(ERR, POWER, "Invalid input parameter\n");
+		return 0;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (num < pi->nb_freqs) {
+		RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
+		return 0;
+	}
+	rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
+
+	return pi->nb_freqs;
+}
+
+uint32_t
+rte_power_acpi_cpufreq_get_freq(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return RTE_POWER_INVALID_FREQ_INDEX;
+	}
+
+	return lcore_power_info[lcore_id].curr_idx;
+}
+
+int
+rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
+}
+
+int
+rte_power_acpi_cpufreq_freq_down(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (pi->curr_idx + 1 == pi->nb_freqs)
+		return 0;
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->curr_idx + 1);
+}
+
+int
+rte_power_acpi_cpufreq_freq_up(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (pi->curr_idx == 0)
+		return 0;
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->curr_idx - 1);
+}
+
+int
+rte_power_acpi_cpufreq_freq_max(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(&lcore_power_info[lcore_id], 0);
+}
+
+int
+rte_power_acpi_cpufreq_freq_min(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->nb_freqs - 1);
+}
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h b/lib/librte_power/rte_power_acpi_cpufreq.h
new file mode 100644
index 0000000..68578e9
--- /dev/null
+++ b/lib/librte_power/rte_power_acpi_cpufreq.h
@@ -0,0 +1,192 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_POWER_ACPI_CPUFREQ_H
+#define _RTE_POWER_ACPI_CPUFREQ_H
+
+/**
+ * @file
+ * RTE Power Management via userspace ACPI cpufreq
+ */
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize power management for a specific lcore. It will check and set the
+ * governor to userspace for the lcore, get the available frequencies, and
+ * prepare to set new lcore frequency.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_init(unsigned lcore_id);
+
+/**
+ * Exit power management on a specific lcore. It will set the governor to which
+ * is before initialized.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_exit(unsigned lcore_id);
+
+/**
+ * Get the available frequencies of a specific lcore. The return value will be
+ * the minimal one of the total number of available frequencies and the number
+ * of buffer. The index of available frequencies used in other interfaces
+ * should be in the range of 0 to this return value.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param freqs
+ *  The buffer array to save the frequencies.
+ * @param num
+ *  The number of frequencies to get.
+ *
+ * @return
+ *  The number of available frequencies.
+ */
+uint32_t rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+/**
+ * Return the current index of available frequencies of a specific lcore. It
+ * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  The current index of available frequencies.
+ */
+uint32_t rte_power_acpi_cpufreq_get_freq(unsigned lcore_id);
+
+/**
+ * Set the new frequency for a specific lcore by indicating the index of
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param index
+ *  The index of available frequencies.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index);
+
+/**
+ * Scale up the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_up(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_down(unsigned lcore_id);
+
+/**
+ * Scale up the frequency of a specific lcore to the highest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore to the lowest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency chnaged.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power_common.h b/lib/librte_power/rte_power_common.h
new file mode 100644
index 0000000..64bd168
--- /dev/null
+++ b/lib/librte_power/rte_power_common.h
@@ -0,0 +1,39 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_POWER_COMMON_H_
+#define RTE_POWER_COMMON_H_
+
+#define RTE_POWER_INVALID_FREQ_INDEX (~0)
+
+#endif /* RTE_POWER_COMMON_H_ */
diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c
new file mode 100644
index 0000000..d8cef98
--- /dev/null
+++ b/lib/librte_power/rte_power_kvm_vm.c
@@ -0,0 +1,160 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <errno.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <rte_config.h>
+
+#include "guest_channel.h"
+#include "channel_commands.h"
+#include "rte_power_kvm_vm.h"
+#include "rte_power_common.h"
+
+#define FD_PATH "/dev/virtio-ports/virtio.serial.port.poweragent"
+
+#define SEND_MSG_AND_RETURN(pkt, lcore_id, ret) do { \
+		ret = guest_channel_send_msg(&pkt[lcore_id], lcore_id); \
+		if ((ret) == 0) \
+			return 1; \
+		if ((ret) > 0) \
+			RTE_LOG(DEBUG, POWER, "Error sending message: %s\n", \
+				strerror(ret)); \
+		return -1; \
+} while (0)
+
+static struct channel_packet pkt[RTE_MAX_LCORE];
+
+
+int
+rte_power_kvm_vm_init(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	pkt[lcore_id].command = CPU_POWER;
+	pkt[lcore_id].resource_id = lcore_id;
+	return guest_channel_host_connect(FD_PATH, lcore_id);
+}
+
+int
+rte_power_kvm_vm_exit(unsigned lcore_id)
+{
+	guest_channel_host_disconnect(lcore_id);
+	return 0;
+}
+
+uint32_t
+rte_power_kvm_vm_freqs(__attribute__((unused)) unsigned lcore_id,
+		__attribute__((unused)) uint32_t *freqs,
+		__attribute__((unused)) uint32_t num)
+{
+	RTE_LOG(ERR, POWER, "rte_power_freqs is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+uint32_t
+rte_power_kvm_vm_get_freq(__attribute__((unused)) unsigned lcore_id)
+{
+	RTE_LOG(ERR, POWER, "rte_power_get_freq is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+int
+rte_power_kvm_vm_set_freq(__attribute__((unused)) unsigned lcore_id,
+		__attribute__((unused)) uint32_t index)
+{
+	RTE_LOG(ERR, POWER, "rte_power_set_freq is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+int
+rte_power_kvm_vm_freq_up(unsigned lcore_id)
+{
+	int ret;
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	pkt[lcore_id].unit = CPU_SCALE_UP;
+
+	SEND_MSG_AND_RETURN(pkt, lcore_id , ret);
+}
+
+int
+rte_power_kvm_vm_freq_down(unsigned lcore_id)
+{
+	int ret;
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	pkt[lcore_id].unit = CPU_SCALE_DOWN;
+
+	SEND_MSG_AND_RETURN(pkt, lcore_id , ret);
+}
+
+int
+rte_power_kvm_vm_freq_max(unsigned lcore_id)
+{
+	int ret;
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	pkt[lcore_id].unit = CPU_SCALE_MAX;
+
+	SEND_MSG_AND_RETURN(pkt, lcore_id , ret);
+}
+
+int
+rte_power_kvm_vm_freq_min(unsigned lcore_id)
+{
+	int ret;
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	pkt[lcore_id].unit = CPU_SCALE_MIN;
+
+	SEND_MSG_AND_RETURN(pkt, lcore_id , ret);
+}
diff --git a/lib/librte_power/rte_power_kvm_vm.h b/lib/librte_power/rte_power_kvm_vm.h
new file mode 100644
index 0000000..dcbc878
--- /dev/null
+++ b/lib/librte_power/rte_power_kvm_vm.h
@@ -0,0 +1,179 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_POWER_KVM_VM_H
+#define _RTE_POWER_KVM_VM_H
+
+/**
+ * @file
+ * RTE Power Management KVM VM
+ */
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize power management for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_init(unsigned lcore_id);
+
+/**
+ * Exit power management on a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_exit(unsigned lcore_id);
+
+/**
+ * Get the available frequencies of a specific lcore.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param freqs
+ *  The buffer array to save the frequencies.
+ * @param num
+ *  The number of frequencies to get.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+uint32_t rte_power_kvm_vm_freqs(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+/**
+ * Return the current index of available frequencies of a specific lcore.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+uint32_t rte_power_kvm_vm_get_freq(unsigned lcore_id);
+
+/**
+ * Set the new frequency for a specific lcore by indicating the index of
+ * available frequencies.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param index
+ *  The index of available frequencies.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+int rte_power_kvm_vm_set_freq(unsigned lcore_id, uint32_t index);
+
+/**
+ * Scale up the frequency of a specific lcore. This request is forwarded to the
+ * host monitor.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_up(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_down(unsigned lcore_id);
+
+/**
+ * Scale up the frequency of a specific lcore to the highest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_max(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore to the lowest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_min(unsigned lcore_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif