From patchwork Wed Aug 24 19:28:59 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Markus Theil X-Patchwork-Id: 115395 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D8A10A00C5; Wed, 24 Aug 2022 21:29:48 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 78B6340DFD; Wed, 24 Aug 2022 21:29:48 +0200 (CEST) Received: from smail.rz.tu-ilmenau.de (smail.rz.tu-ilmenau.de [141.24.186.67]) by mails.dpdk.org (Postfix) with ESMTP id BB82140DDE for ; Wed, 24 Aug 2022 21:29:46 +0200 (CEST) Received: from localhost.localdomain (p5dd10db2.dip0.t-ipconnect.de [93.209.13.178]) by smail.rz.tu-ilmenau.de (Postfix) with ESMTPA id 41C83580096; Wed, 24 Aug 2022 21:29:46 +0200 (CEST) From: Markus Theil To: David Hunt Cc: dev@dpdk.org, Markus Theil Subject: [PATCH] power: add unpriv. read of turbo % for pstate Date: Wed, 24 Aug 2022 21:28:59 +0200 Message-Id: <20220824192859.66095-1-markus.theil@tu-ilmenau.de> X-Mailer: git-send-email 2.37.2 MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org If DPDK applications should be used with a minimal set of privileges, using the msr kernel module on linux should not be necessary. Since at least kernel 4.4 the rdmsr call to obtain the last non-turbo boost frequency can be left out, if the sysfs interface is used. Also RHEL 7 with recent kernel updates should include the sysfs interface for this (I only looked this up for CentOS 7). Signed-off-by: Markus Theil Tested-By: David Hunt Acked-By: David Hunt --- lib/power/power_pstate_cpufreq.c | 69 ++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/lib/power/power_pstate_cpufreq.c b/lib/power/power_pstate_cpufreq.c index 78c9197695..c3d66a8f68 100644 --- a/lib/power/power_pstate_cpufreq.c +++ b/lib/power/power_pstate_cpufreq.c @@ -35,15 +35,9 @@ "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq" #define POWER_SYSFILE_BASE_FREQ \ "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" +#define POWER_SYSFILE_TURBO_PCT \ + "/sys/devices/system/cpu/intel_pstate/turbo_pct" #define POWER_PSTATE_DRIVER "intel_pstate" -#define POWER_MSR_PATH "/dev/cpu/%u/msr" - -/* - * MSR related - */ -#define PLATFORM_INFO 0x0CE -#define NON_TURBO_MASK 0xFF00 -#define NON_TURBO_OFFSET 0x8 enum power_state { @@ -74,37 +68,33 @@ struct pstate_power_info { static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE]; /** - * It is to read the specific MSR. + * It is to read the turbo mode percentage from sysfs */ - static int32_t -power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id) +power_read_turbo_pct(uint64_t *outVal) { int fd, ret; - char fullpath[PATH_MAX]; + char val[4] = {0}; - snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id); - - fd = open(fullpath, O_RDONLY); + fd = open(POWER_SYSFILE_TURBO_PCT, O_RDONLY); if (fd < 0) { - RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath, + RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", POWER_SYSFILE_TURBO_PCT, strerror(errno)); return fd; } - ret = pread(fd, val, sizeof(uint64_t), msr); + ret = read(fd, val, sizeof(val)); if (ret < 0) { - RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath, + RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", POWER_SYSFILE_TURBO_PCT, strerror(errno)); goto out; } - POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n", - fullpath, msr, lcore_id); + *outVal = (uint64_t) atol(val); - POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val); + POWER_DEBUG_TRACE("power turbo pct: %"PRIu64"\n", *outVal); out: close(fd); return ret; @@ -116,8 +106,9 @@ out: close(fd); static int power_init_for_setting_freq(struct pstate_power_info *pi) { - FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL; - uint32_t base_ratio, base_max_ratio; + FILE *f_base = NULL, *f_base_min = NULL, *f_base_max = NULL, + *f_min = NULL, *f_max = NULL; + uint32_t base_ratio, base_min_ratio, base_max_ratio; uint64_t max_non_turbo; int ret; @@ -130,6 +121,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi) goto err; } + open_core_sysfs_file(&f_base_min, "r", POWER_SYSFILE_BASE_MIN_FREQ, + pi->lcore_id); + if (f_base_min == NULL) { + RTE_LOG(ERR, POWER, "failed to open %s\n", + POWER_SYSFILE_BASE_MIN_FREQ); + goto err; + } + open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ, pi->lcore_id); if (f_min == NULL) { @@ -158,6 +157,14 @@ power_init_for_setting_freq(struct pstate_power_info *pi) goto err; } + /* read base min ratio */ + ret = read_core_sysfs_u32(f_base_min, &base_min_ratio); + if (ret < 0) { + RTE_LOG(ERR, POWER, "Failed to read %s\n", + POWER_SYSFILE_BASE_MIN_FREQ); + goto err; + } + /* base ratio may not exist */ if (f_base != NULL) { ret = read_core_sysfs_u32(f_base, &base_ratio); @@ -170,20 +177,22 @@ power_init_for_setting_freq(struct pstate_power_info *pi) base_ratio = 0; } - /* Add MSR read to detect turbo status */ - if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0) - goto err; - /* no errors after this point */ - /* convert ratios to bins */ base_max_ratio /= BUS_FREQ; + base_min_ratio /= BUS_FREQ; base_ratio /= BUS_FREQ; /* assign file handles */ pi->f_cur_min = f_min; pi->f_cur_max = f_max; - max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET; + /* try to get turbo from global sysfs entry for less privileges than from MSR */ + if (power_read_turbo_pct(&max_non_turbo) < 0) + goto err; + /* no errors after this point */ + + max_non_turbo = base_min_ratio + + (100 - max_non_turbo) * (base_max_ratio - base_min_ratio) / 100; POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo); @@ -220,6 +229,8 @@ power_init_for_setting_freq(struct pstate_power_info *pi) err: if (f_base != NULL) fclose(f_base); + if (f_base_min != NULL) + fclose(f_base_min); if (f_base_max != NULL) fclose(f_base_max); if (f_min != NULL)