mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-02 03:44:36 +08:00
4ecc933b7d
The current EPB "normal" is defined as 6 and set whenever power-up EPB value is 0. This setting resulted in the desired out of box power and performance for several CPU generations. But this value is not suitable for AlderLake mobile CPUs, as this resulted in higher uncore power. Since EPB is model specific, this is not unreasonable to have different behavior. Allow a capability where "normal" EPB can be redefined. For AlderLake mobile CPUs this desired normal value is 7. Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
236 lines
6.6 KiB
C
236 lines
6.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Intel Performance and Energy Bias Hint support.
|
|
*
|
|
* Copyright (C) 2019 Intel Corporation
|
|
*
|
|
* Author:
|
|
* Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
|
*/
|
|
|
|
#include <linux/cpuhotplug.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/device.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/syscore_ops.h>
|
|
#include <linux/pm.h>
|
|
|
|
#include <asm/cpu_device_id.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/msr.h>
|
|
|
|
/**
|
|
* DOC: overview
|
|
*
|
|
* The Performance and Energy Bias Hint (EPB) allows software to specify its
|
|
* preference with respect to the power-performance tradeoffs present in the
|
|
* processor. Generally, the EPB is expected to be set by user space (directly
|
|
* via sysfs or with the help of the x86_energy_perf_policy tool), but there are
|
|
* two reasons for the kernel to update it.
|
|
*
|
|
* First, there are systems where the platform firmware resets the EPB during
|
|
* system-wide transitions from sleep states back into the working state
|
|
* effectively causing the previous EPB updates by user space to be lost.
|
|
* Thus the kernel needs to save the current EPB values for all CPUs during
|
|
* system-wide transitions to sleep states and restore them on the way back to
|
|
* the working state. That can be achieved by saving EPB for secondary CPUs
|
|
* when they are taken offline during transitions into system sleep states and
|
|
* for the boot CPU in a syscore suspend operation, so that it can be restored
|
|
* for the boot CPU in a syscore resume operation and for the other CPUs when
|
|
* they are brought back online. However, CPUs that are already offline when
|
|
* a system-wide PM transition is started are not taken offline again, but their
|
|
* EPB values may still be reset by the platform firmware during the transition,
|
|
* so in fact it is necessary to save the EPB of any CPU taken offline and to
|
|
* restore it when the given CPU goes back online at all times.
|
|
*
|
|
* Second, on many systems the initial EPB value coming from the platform
|
|
* firmware is 0 ('performance') and at least on some of them that is because
|
|
* the platform firmware does not initialize EPB at all with the assumption that
|
|
* the OS will do that anyway. That sometimes is problematic, as it may cause
|
|
* the system battery to drain too fast, for example, so it is better to adjust
|
|
* it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
|
|
* kernel changes it to 6 ('normal').
|
|
*/
|
|
|
|
static DEFINE_PER_CPU(u8, saved_epb);
|
|
|
|
#define EPB_MASK 0x0fULL
|
|
#define EPB_SAVED 0x10ULL
|
|
#define MAX_EPB EPB_MASK
|
|
|
|
enum energy_perf_value_index {
|
|
EPB_INDEX_PERFORMANCE,
|
|
EPB_INDEX_BALANCE_PERFORMANCE,
|
|
EPB_INDEX_NORMAL,
|
|
EPB_INDEX_BALANCE_POWERSAVE,
|
|
EPB_INDEX_POWERSAVE,
|
|
};
|
|
|
|
static u8 energ_perf_values[] = {
|
|
[EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE,
|
|
[EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
|
|
[EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL,
|
|
[EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
|
|
[EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE,
|
|
};
|
|
|
|
static int intel_epb_save(void)
|
|
{
|
|
u64 epb;
|
|
|
|
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
|
/*
|
|
* Ensure that saved_epb will always be nonzero after this write even if
|
|
* the EPB value read from the MSR is 0.
|
|
*/
|
|
this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void intel_epb_restore(void)
|
|
{
|
|
u64 val = this_cpu_read(saved_epb);
|
|
u64 epb;
|
|
|
|
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
|
if (val) {
|
|
val &= EPB_MASK;
|
|
} else {
|
|
/*
|
|
* Because intel_epb_save() has not run for the current CPU yet,
|
|
* it is going online for the first time, so if its EPB value is
|
|
* 0 ('performance') at this point, assume that it has not been
|
|
* initialized by the platform firmware and set it to 6
|
|
* ('normal').
|
|
*/
|
|
val = epb & EPB_MASK;
|
|
if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
|
|
val = energ_perf_values[EPB_INDEX_NORMAL];
|
|
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
|
}
|
|
}
|
|
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
|
|
}
|
|
|
|
static struct syscore_ops intel_epb_syscore_ops = {
|
|
.suspend = intel_epb_save,
|
|
.resume = intel_epb_restore,
|
|
};
|
|
|
|
static const char * const energy_perf_strings[] = {
|
|
[EPB_INDEX_PERFORMANCE] = "performance",
|
|
[EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance",
|
|
[EPB_INDEX_NORMAL] = "normal",
|
|
[EPB_INDEX_BALANCE_POWERSAVE] = "balance-power",
|
|
[EPB_INDEX_POWERSAVE] = "power",
|
|
};
|
|
|
|
static ssize_t energy_perf_bias_show(struct device *dev,
|
|
struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
unsigned int cpu = dev->id;
|
|
u64 epb;
|
|
int ret;
|
|
|
|
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return sprintf(buf, "%llu\n", epb);
|
|
}
|
|
|
|
static ssize_t energy_perf_bias_store(struct device *dev,
|
|
struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
unsigned int cpu = dev->id;
|
|
u64 epb, val;
|
|
int ret;
|
|
|
|
ret = __sysfs_match_string(energy_perf_strings,
|
|
ARRAY_SIZE(energy_perf_strings), buf);
|
|
if (ret >= 0)
|
|
val = energ_perf_values[ret];
|
|
else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
|
|
return -EINVAL;
|
|
|
|
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
|
|
(epb & ~EPB_MASK) | val);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return count;
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(energy_perf_bias);
|
|
|
|
static struct attribute *intel_epb_attrs[] = {
|
|
&dev_attr_energy_perf_bias.attr,
|
|
NULL
|
|
};
|
|
|
|
static const struct attribute_group intel_epb_attr_group = {
|
|
.name = power_group_name,
|
|
.attrs = intel_epb_attrs
|
|
};
|
|
|
|
static int intel_epb_online(unsigned int cpu)
|
|
{
|
|
struct device *cpu_dev = get_cpu_device(cpu);
|
|
|
|
intel_epb_restore();
|
|
if (!cpuhp_tasks_frozen)
|
|
sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int intel_epb_offline(unsigned int cpu)
|
|
{
|
|
struct device *cpu_dev = get_cpu_device(cpu);
|
|
|
|
if (!cpuhp_tasks_frozen)
|
|
sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
|
|
|
|
intel_epb_save();
|
|
return 0;
|
|
}
|
|
|
|
static const struct x86_cpu_id intel_epb_normal[] = {
|
|
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 7),
|
|
{}
|
|
};
|
|
|
|
static __init int intel_epb_init(void)
|
|
{
|
|
const struct x86_cpu_id *id = x86_match_cpu(intel_epb_normal);
|
|
int ret;
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_EPB))
|
|
return -ENODEV;
|
|
|
|
if (id)
|
|
energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data;
|
|
|
|
ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
|
|
"x86/intel/epb:online", intel_epb_online,
|
|
intel_epb_offline);
|
|
if (ret < 0)
|
|
goto err_out_online;
|
|
|
|
register_syscore_ops(&intel_epb_syscore_ops);
|
|
return 0;
|
|
|
|
err_out_online:
|
|
cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
|
|
return ret;
|
|
}
|
|
subsys_initcall(intel_epb_init);
|