Power management fixes for v4.11-rc4

- Make intel_pstate use one set of global P-state limits in the
    active mode regardless of the scaling_governor settings for
    individual CPUs instead of switching back and forth between two
    of them in a way that is hard to control (Rafael Wysocki).
 
  - Drop a useless function from intel_pstate to prevent it from
    modifying the maximum supported frequency value unexpectedly
    which may confuse the cpufreq core (Rafael Wysocki).
 
  - Fix the cpufreq core to restore policy limits on CPU online so
    that the limits are not reset over system suspend/resume, among
    other things (Viresh Kumar).
 
  - Fix the initialization of the schedutil cpufreq governor to
    make the IO-wait boosting mechanism in it actually work on
    systems with one CPU per cpufreq policy (Rafael Wysocki).
 
  - Add a sanity check to the cpuidle core to prevent crashes from
    happening if the architecture code initialization fails to set
    up things as expected (Vaidyanathan Srinivasan).
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJY1HLGAAoJEILEb/54YlRxjAsQAIFcYfJKosA8IlmKcricR/WH
 CqBMCH9S6Y5YsggrFjcj3lVJbf5P43/h3U++O+/97lJsevfp4inCChpvVSQIWX4v
 xzk2v+5Ms8ROqVSLy34yTaB0ysCC//J5FvdQLsj0Zw9W/8yvi0DosPfeiAD7sYeb
 4qkJK3+yv5sLtZ41FmdVYabhC5KHQSAV6p6X+KOZnFV8cm+8TfOSERhStXASMTGc
 tvDpjIjPA1GLpHYdOK4UQ+Er1Hgwk2fNX7eXrpHh7QCQx4eZEN+g7DAC95Ify9Am
 gkTFc5eUfOFKU5KMshdQh6gnfoNaKi4d3E/ahmnU+KQuyKiy4KMyTNcuUBszSaDM
 ZTm0GooseV0UajaLH08BNbfpqDsiKc2fm1qkCQkXxpGjs80/bYn5gK+fpvkziq9x
 210Wc7XTWf7JfmPs0d3gZekaohHtqJVigCuA4dXH6kvbDvbDcfKOza6rqNmpSFrQ
 ifWH6M12Ut/G5NfwihhTRhoKkeQjqHFgikNC8BjF2Myem20026Vr6MKZrubDAlkq
 VWP7lT2zNSs1btsNqDrA9+wejwK8OwwrpfZOx3hbYL6Q+u/AuljIJ79aRz8ROZcE
 jQZeOKprmlAaDIASdxIM4yjwzSQE0l/CaHteHfKaddmcWTrtKR+CEfU0ODzYWoK0
 dcQwyMF3tOToj7BjWqKM
 =jy4C
 -----END PGP SIGNATURE-----

Merge tag 'pm-4.11-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management fixes from Rafael Wysocki:
 "One of these is an intel_pstate regression fix and it is not a small
  change, but it mostly removes code that shouldn't be there. That code
  was acquired by mistake and has been a source of constant pain since
  then, so the time has come to get rid of it finally. We have not seen
  problems with this change in the lab, so fingers crossed.

  The rest is more usual: one more intel_pstate commit removing useless
  code, a cpufreq core fix to make it restore policy limits on CPU
  online (which prevents the limits from being reset over system
  suspend/resume), a schedutil cpufreq governor initialization fix to
  make it actually work as advertised on all systems and an extra sanity
  check in the cpuidle core to prevent crashes from happening if the
  arch code messes things up.

  Specifics:

   - Make intel_pstate use one set of global P-state limits in the
     active mode regardless of the scaling_governor settings for
     individual CPUs instead of switching back and forth between two of
     them in a way that is hard to control (Rafael Wysocki).

   - Drop a useless function from intel_pstate to prevent it from
     modifying the maximum supported frequency value unexpectedly which
     may confuse the cpufreq core (Rafael Wysocki).

   - Fix the cpufreq core to restore policy limits on CPU online so that
     the limits are not reset over system suspend/resume, among other
     things (Viresh Kumar).

   - Fix the initialization of the schedutil cpufreq governor to make
     the IO-wait boosting mechanism in it actually work on systems with
     one CPU per cpufreq policy (Rafael Wysocki).

   - Add a sanity check to the cpuidle core to prevent crashes from
     happening if the architecture code initialization fails to set up
     things as expected (Vaidyanathan Srinivasan)"

* tag 'pm-4.11-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: Restore policy min/max limits on CPU online
  cpuidle: Validate cpu_dev in cpuidle_add_sysfs()
  cpufreq: intel_pstate: Fix policy data management in passive mode
  cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start()
  cpufreq: intel_pstate: One set of global limits in active mode
This commit is contained in:
Linus Torvalds 2017-03-23 20:00:39 -07:00
commit ebe64824e9
4 changed files with 72 additions and 130 deletions

View File

@ -1184,6 +1184,9 @@ static int cpufreq_online(unsigned int cpu)
for_each_cpu(j, policy->related_cpus)
per_cpu(cpufreq_cpu_data, j) = policy;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
} else {
policy->min = policy->user_policy.min;
policy->max = policy->user_policy.max;
}
if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {

View File

@ -364,9 +364,7 @@ static bool driver_registered __read_mostly;
static bool acpi_ppc;
#endif
static struct perf_limits performance_limits;
static struct perf_limits powersave_limits;
static struct perf_limits *limits;
static struct perf_limits global;
static void intel_pstate_init_limits(struct perf_limits *limits)
{
@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits)
limits->max_sysfs_pct = 100;
}
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
intel_pstate_init_limits(limits);
limits->min_perf_pct = 100;
limits->min_perf = int_ext_tofp(1);
limits->min_sysfs_pct = 100;
}
static DEFINE_MUTEX(intel_pstate_driver_lock);
static DEFINE_MUTEX(intel_pstate_limits_lock);
@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
* correct max turbo frequency based on the turbo state.
* Also need to convert to MHz as _PSS freq is in MHz.
*/
if (!limits->turbo_disabled)
if (!global.turbo_disabled)
cpu->acpi_perf_data.states[0].core_frequency =
policy->cpuinfo.max_freq / 1000;
cpu->valid_pss_table = true;
@ -626,7 +616,7 @@ static inline void update_turbo_state(void)
cpu = all_cpu_data[0];
rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
limits->turbo_disabled =
global.turbo_disabled =
(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
{
int min, hw_min, max, hw_max, cpu;
struct perf_limits *perf_limits = limits;
struct perf_limits *perf_limits = &global;
u64 value, cap;
for_each_cpu(cpu, policy->cpus) {
@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
hw_min = HWP_LOWEST_PERF(cap);
if (limits->no_turbo)
if (global.no_turbo)
hw_max = HWP_GUARANTEED_PERF(cap);
else
hw_max = HWP_HIGHEST_PERF(cap);
min = fp_ext_toint(hw_max * perf_limits->min_perf);
max = fp_ext_toint(hw_max * perf_limits->max_perf);
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
min = max;
else
min = fp_ext_toint(hw_max * perf_limits->min_perf);
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
max = fp_ext_toint(hw_max * perf_limits->max_perf);
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
}
static void intel_pstate_update_policies(void)
__releases(&intel_pstate_limits_lock)
__acquires(&intel_pstate_limits_lock)
{
struct perf_limits *saved_limits = limits;
int cpu;
mutex_unlock(&intel_pstate_limits_lock);
for_each_possible_cpu(cpu)
cpufreq_update_policy(cpu);
mutex_lock(&intel_pstate_limits_lock);
limits = saved_limits;
}
/************************** debugfs begin ************************/
@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void)
static ssize_t show_##file_name \
(struct kobject *kobj, struct attribute *attr, char *buf) \
{ \
return sprintf(buf, "%u\n", limits->object); \
return sprintf(buf, "%u\n", global.object); \
}
static ssize_t intel_pstate_show_status(char *buf);
@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj,
}
update_turbo_state();
if (limits->turbo_disabled)
ret = sprintf(buf, "%u\n", limits->turbo_disabled);
if (global.turbo_disabled)
ret = sprintf(buf, "%u\n", global.turbo_disabled);
else
ret = sprintf(buf, "%u\n", limits->no_turbo);
ret = sprintf(buf, "%u\n", global.no_turbo);
mutex_unlock(&intel_pstate_driver_lock);
@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
mutex_lock(&intel_pstate_limits_lock);
update_turbo_state();
if (limits->turbo_disabled) {
if (global.turbo_disabled) {
pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
mutex_unlock(&intel_pstate_limits_lock);
mutex_unlock(&intel_pstate_driver_lock);
return -EPERM;
}
limits->no_turbo = clamp_t(int, input, 0, 1);
intel_pstate_update_policies();
global.no_turbo = clamp_t(int, input, 0, 1);
mutex_unlock(&intel_pstate_limits_lock);
intel_pstate_update_policies();
mutex_unlock(&intel_pstate_driver_lock);
return count;
@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
mutex_lock(&intel_pstate_limits_lock);
limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
limits->max_perf_pct = min(limits->max_policy_pct,
limits->max_sysfs_pct);
limits->max_perf_pct = max(limits->min_policy_pct,
limits->max_perf_pct);
limits->max_perf_pct = max(limits->min_perf_pct,
limits->max_perf_pct);
limits->max_perf = percent_ext_fp(limits->max_perf_pct);
intel_pstate_update_policies();
global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
global.max_perf = percent_ext_fp(global.max_perf_pct);
mutex_unlock(&intel_pstate_limits_lock);
intel_pstate_update_policies();
mutex_unlock(&intel_pstate_driver_lock);
return count;
@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
mutex_lock(&intel_pstate_limits_lock);
limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
limits->min_perf_pct = max(limits->min_policy_pct,
limits->min_sysfs_pct);
limits->min_perf_pct = min(limits->max_policy_pct,
limits->min_perf_pct);
limits->min_perf_pct = min(limits->max_perf_pct,
limits->min_perf_pct);
limits->min_perf = percent_ext_fp(limits->min_perf_pct);
intel_pstate_update_policies();
global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
global.min_perf = percent_ext_fp(global.min_perf_pct);
mutex_unlock(&intel_pstate_limits_lock);
intel_pstate_update_policies();
mutex_unlock(&intel_pstate_driver_lock);
return count;
@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
u32 vid;
val = (u64)pstate << 8;
if (limits->no_turbo && !limits->turbo_disabled)
if (global.no_turbo && !global.turbo_disabled)
val |= (u64)1 << 32;
vid_fp = cpudata->vid.min + mul_fp(
@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
u64 val;
val = (u64)pstate << 8;
if (limits->no_turbo && !limits->turbo_disabled)
if (global.no_turbo && !global.turbo_disabled)
val |= (u64)1 << 32;
return val;
@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
int max_perf = cpu->pstate.turbo_pstate;
int max_perf_adj;
int min_perf;
struct perf_limits *perf_limits = limits;
struct perf_limits *perf_limits = &global;
if (limits->no_turbo || limits->turbo_disabled)
if (global.no_turbo || global.turbo_disabled)
max_perf = cpu->pstate.max_pstate;
if (per_cpu_limits)
@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
sample->busy_scaled = busy_frac * 100;
target = limits->no_turbo || limits->turbo_disabled ?
target = global.no_turbo || global.turbo_disabled ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
target += target >> 2;
target = mul_fp(target, busy_frac);
@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
struct perf_limits *perf_limits = NULL;
struct perf_limits *perf_limits = &global;
if (!policy->cpuinfo.max_freq)
return -ENODEV;
@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
mutex_lock(&intel_pstate_limits_lock);
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
pr_debug("set performance\n");
if (!perf_limits) {
limits = &performance_limits;
perf_limits = limits;
}
} else {
pr_debug("set powersave\n");
if (!perf_limits) {
limits = &powersave_limits;
perf_limits = limits;
}
}
intel_pstate_update_perf_limits(policy, perf_limits);
if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
struct cpudata *cpu = all_cpu_data[policy->cpu];
struct perf_limits *perf_limits;
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
perf_limits = &performance_limits;
else
perf_limits = &powersave_limits;
update_turbo_state();
policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
perf_limits->no_turbo ?
policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
cpu->pstate.max_freq :
cpu->pstate.turbo_freq;
@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
unsigned int max_freq, min_freq;
max_freq = policy->cpuinfo.max_freq *
perf_limits->max_sysfs_pct / 100;
global.max_sysfs_pct / 100;
min_freq = policy->cpuinfo.max_freq *
perf_limits->min_sysfs_pct / 100;
global.min_sysfs_pct / 100;
cpufreq_verify_within_limits(policy, min_freq, max_freq);
}
@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
update_turbo_state();
policy->cpuinfo.max_freq = limits->turbo_disabled ?
policy->cpuinfo.max_freq = global.turbo_disabled ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
return ret;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;
@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
struct cpudata *cpu = all_cpu_data[policy->cpu];
update_turbo_state();
policy->cpuinfo.max_freq = limits->turbo_disabled ?
policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
cpufreq_verify_within_cpu_limits(policy);
@ -2309,26 +2265,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
return 0;
}
static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
struct cpufreq_policy *policy,
unsigned int target_freq)
{
unsigned int max_freq;
update_turbo_state();
max_freq = limits->no_turbo || limits->turbo_disabled ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
policy->cpuinfo.max_freq = max_freq;
if (policy->max > max_freq)
policy->max = max_freq;
if (target_freq > max_freq)
target_freq = max_freq;
return target_freq;
}
static int intel_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@ -2337,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
struct cpufreq_freqs freqs;
int target_pstate;
update_turbo_state();
freqs.old = policy->cur;
freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
freqs.new = target_freq;
cpufreq_freq_transition_begin(policy, &freqs);
switch (relation) {
@ -2370,7 +2308,8 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
struct cpudata *cpu = all_cpu_data[policy->cpu];
int target_pstate;
target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
update_turbo_state();
target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
intel_pstate_update_pstate(cpu, target_pstate);
@ -2425,13 +2364,7 @@ static int intel_pstate_register_driver(void)
{
int ret;
intel_pstate_init_limits(&powersave_limits);
intel_pstate_set_performance_limits(&performance_limits);
if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
intel_pstate_driver == &intel_pstate)
limits = &performance_limits;
else
limits = &powersave_limits;
intel_pstate_init_limits(&global);
ret = cpufreq_register_driver(intel_pstate_driver);
if (ret) {

View File

@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
/*
* Return if cpu_device is not setup for this CPU.
*
* This could happen if the arch did not set up cpu_device
* since this CPU is not in cpu_present mask and the
* driver did not send a correct CPU mask during registration.
* Without this check we would end up passing bogus
* value for &cpu_dev->kobj in kobject_init_and_add()
*/
if (!cpu_dev)
return -ENODEV;
kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
if (!kdev)
return -ENOMEM;

View File

@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy)
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->sg_policy = sg_policy;
if (policy_is_shared(policy)) {
sg_cpu->util = 0;
sg_cpu->max = 0;
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
sg_cpu->iowait_boost = 0;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_shared);
} else {
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_single);
}
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
policy_is_shared(policy) ?
sugov_update_shared :
sugov_update_single);
}
return 0;
}