From 52b43bbdb6d8ab73f6f5db1dbcab4e410f58a2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 8 Mar 2024 09:51:14 +0100 Subject: [PATCH 1/7] powercap: intel_rapl: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_msr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index b4b6930cacb0..35cb152fa9aa 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -197,11 +197,10 @@ out: return ret; } -static int rapl_msr_remove(struct platform_device *pdev) +static void rapl_msr_remove(struct platform_device *pdev) { cpuhp_remove_state(rapl_msr_priv->pcap_rapl_online); powercap_unregister_control_type(rapl_msr_priv->control_type); - return 0; } static const struct platform_device_id rapl_msr_ids[] = { @@ -212,7 +211,7 @@ MODULE_DEVICE_TABLE(platform, rapl_msr_ids); static struct platform_driver intel_rapl_msr_driver = { .probe = rapl_msr_probe, - .remove = rapl_msr_remove, + .remove_new = rapl_msr_remove, .id_table = rapl_msr_ids, .driver = { .name = "intel_rapl_msr", From 3acec69a94eaaf3d7ebb043e1db45cc26f58a847 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Fri, 8 Mar 2024 12:32:03 +0000 Subject: [PATCH 2/7] PM: EM: Force device drivers to provide power in uW The EM only supports power in uW. Make sure that it is not possible to register some downstream driver which doesn't provide power in uW. The only exception is artificial EM, but that EM is ignored by the rest of kernel frameworks (thermal, powercap, etc). Reported-by: PoShao Chen Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index b686ac0345bd..9e1c9aa399ea 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -612,6 +612,17 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, else if (cb->get_cost) flags |= EM_PERF_DOMAIN_ARTIFICIAL; + /* + * EM only supports uW (exception is artificial EM). + * Therefore, check and force the drivers to provide + * power in uW. + */ + if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) { + dev_err(dev, "EM: only supports uW power values\n"); + ret = -EINVAL; + goto unlock; + } + ret = em_create_pd(dev, nr_states, cb, cpus, flags); if (ret) goto unlock; From d61120b4623e24e863a455c1e11085fcb7653d08 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 8 Mar 2024 14:44:50 -0800 Subject: [PATCH 3/7] Documentation: power: Fix typo in suspend and interrupts doc Typos are bad. Fix them. Signed-off-by: Saravana Kannan Reviewed-by: Dhruva Gole Signed-off-by: Rafael J. Wysocki --- Documentation/power/suspend-and-interrupts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/suspend-and-interrupts.rst b/Documentation/power/suspend-and-interrupts.rst index dfbace2f4600..f588feeecad0 100644 --- a/Documentation/power/suspend-and-interrupts.rst +++ b/Documentation/power/suspend-and-interrupts.rst @@ -78,7 +78,7 @@ handling the given IRQ as a system wakeup interrupt line and disable_irq_wake() turns that logic off. Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ -in a special way. Namely, the IRQ remains enabled, by on the first interrupt +in a special way. Namely, the IRQ remains enabled, but on the first interrupt it will be disabled, marked as pending and "suspended" so that it will be re-enabled by resume_device_irqs() during the subsequent system resume. Also the PM core is notified about the event which causes the system suspend in From f37a4d6b4a2c77414e8b9d25dd5ee31537ce9b00 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 12 Mar 2024 16:07:23 +0530 Subject: [PATCH 4/7] cpufreq: Fix per-policy boost behavior on SoCs using cpufreq_boost_set_sw() In the existing code, per-policy flags don't have any impact i.e. if cpufreq_driver boost is enabled and boost is disabled for one or more of the policies, the cpufreq driver will behave as if boost is enabled. Fix this by incorporating per-policy boost flag in the policy->max computation used in cpufreq_frequency_table_cpuinfo and setting the default per-policy boost to mirror the cpufreq_driver boost flag. Fixes: 218a06a79d9a ("cpufreq: Support per-policy performance boost") Reported-by: Dietmar Eggemann Reviewed-by: Viresh Kumar Reviewed-by: Dhruva Gole Signed-off-by: Sibi Sankar Tested-by:Yipeng Zou Reviewed-by: Yipeng Zou Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 20 +++++++++++++------- drivers/cpufreq/freq_table.c | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f6f8d7f450e7..66e10a19d76a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -653,14 +653,16 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy, if (policy->boost_enabled == enable) return count; + policy->boost_enabled = enable; + cpus_read_lock(); ret = cpufreq_driver->set_boost(policy, enable); cpus_read_unlock(); - if (ret) + if (ret) { + policy->boost_enabled = !policy->boost_enabled; return ret; - - policy->boost_enabled = enable; + } return count; } @@ -1428,6 +1430,9 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -2769,11 +2774,12 @@ int cpufreq_boost_trigger_state(int state) cpus_read_lock(); for_each_active_policy(policy) { - ret = cpufreq_driver->set_boost(policy, state); - if (ret) - goto err_reset_state; - policy->boost_enabled = state; + ret = cpufreq_driver->set_boost(policy, state); + if (ret) { + policy->boost_enabled = !policy->boost_enabled; + goto err_reset_state; + } } cpus_read_unlock(); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c4d4643b6ca6..c17dc51a5a02 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -40,7 +40,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry(pos, table) { freq = pos->frequency; - if (!cpufreq_boost_enabled() + if ((!cpufreq_boost_enabled() || !policy->boost_enabled) && (pos->flags & CPUFREQ_BOOST_FREQ)) continue; From d2399501c2c081eac703ca9597ceb83c7875a537 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Mar 2024 13:54:57 +0100 Subject: [PATCH 5/7] cpufreq: dt: always allocate zeroed cpumask Commit 0499a78369ad ("ARM64: Dynamically allocate cpumasks and increase supported CPUs to 512") changed the handling of cpumasks on ARM 64bit, what resulted in the strange issues and warnings during cpufreq-dt initialization on some big.LITTLE platforms. This was caused by mixing OPPs between big and LITTLE cores, because OPP-sharing information between big and LITTLE cores is computed on cpumask, which in turn was not zeroed on allocation. Fix this by switching to zalloc_cpumask_var() call. Fixes: dc279ac6e5b4 ("cpufreq: dt: Refactor initialization to handle probe deferral properly") CC: stable@vger.kernel.org # v5.10+ Signed-off-by: Marek Szyprowski Reviewed-by: Christoph Lameter (Ampere) Reviewed-by: Dhruva Gole Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 8bd6e5e8f121..2d83bbc65dd0 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -208,7 +208,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) if (!priv) return -ENOMEM; - if (!alloc_cpumask_var(&priv->cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&priv->cpus, GFP_KERNEL)) return -ENOMEM; cpumask_set_cpu(cpu, priv->cpus); From a897575e79d7a2ec79abe942b50ecd1d1cdd821b Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 12 Mar 2024 15:17:25 +0530 Subject: [PATCH 6/7] firmware: arm_scmi: Add support for marking certain frequencies as turbo All opps above the sustained frequency are treated as turbo, so mark them accordingly. Suggested-by: Sudeep Holla Reviewed-by: Sudeep Holla Signed-off-by: Sibi Sankar Reviewed-by: Dhruva Gole Signed-off-by: Viresh Kumar --- drivers/firmware/arm_scmi/perf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index be1801fb848d..280b29c0a903 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -819,6 +819,9 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, else freq = dom->opp[idx].indicative_freq * dom->mult_factor; + /* All OPPs above the sustained frequency are treated as turbo */ + data.turbo = freq > dom->sustained_freq_khz * 1000; + data.level = dom->opp[idx].perf; data.freq = freq; From a8e949d41c72a0a860b1c3571d6b274e9b77b6bb Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 12 Mar 2024 15:17:26 +0530 Subject: [PATCH 7/7] cpufreq: scmi: Enable boost support Certain platforms host a number of higher OPPs that are exclusive to CPUs within specific CPUfreq policies and not all CPUs within that CPUfreq policy are able to achieve those higher OPPs due to power constraints. These OPPs are marked as turbo in the freq_table and in the presence of such OPPs, let's enable boost by default. Reviewed-by: Sudeep Holla Signed-off-by: Sibi Sankar Reviewed-by: Dhruva Gole Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 0b483bd0d3ca..3b4f6bfb2f4c 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -30,6 +30,7 @@ struct scmi_data { static struct scmi_protocol_handle *ph; static const struct scmi_perf_proto_ops *perf_ops; +static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { @@ -167,6 +168,12 @@ scmi_get_rate_limit(u32 domain, bool has_fast_switch) return rate_limit; } +static struct freq_attr *scmi_cpufreq_hw_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, + NULL, +}; + static int scmi_cpufreq_init(struct cpufreq_policy *policy) { int ret, nr_opp, domain; @@ -276,6 +283,17 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->transition_delay_us = scmi_get_rate_limit(domain, policy->fast_switch_possible); + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) { + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + goto out_free_opp; + } else { + scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + scmi_cpufreq_driver.boost_enabled = true; + } + } + return 0; out_free_opp: @@ -334,7 +352,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = { CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = cpufreq_generic_attr, + .attr = scmi_cpufreq_hw_attr, .target_index = scmi_cpufreq_set_target, .fast_switch = scmi_cpufreq_fast_switch, .get = scmi_cpufreq_get_rate,