linux/drivers/base/power/runtime.c

1823 lines
50 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/runtime.c - Helper functions for device runtime PM
*
* Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
* Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
*/
#include <linux/sched/mm.h>
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/export.h>
#include <linux/pm_runtime.h>
#include <linux/pm_wakeirq.h>
#include <trace/events/rpm.h>
#include "../base.h"
#include "power.h"
typedef int (*pm_callback_t)(struct device *);
static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset)
{
pm_callback_t cb;
const struct dev_pm_ops *ops;
if (dev->pm_domain)
ops = &dev->pm_domain->ops;
else if (dev->type && dev->type->pm)
ops = dev->type->pm;
else if (dev->class && dev->class->pm)
ops = dev->class->pm;
else if (dev->bus && dev->bus->pm)
ops = dev->bus->pm;
else
ops = NULL;
if (ops)
cb = *(pm_callback_t *)((void *)ops + cb_offset);
else
cb = NULL;
if (!cb && dev->driver && dev->driver->pm)
cb = *(pm_callback_t *)((void *)dev->driver->pm + cb_offset);
return cb;
}
#define RPM_GET_CALLBACK(dev, callback) \
__rpm_get_callback(dev, offsetof(struct dev_pm_ops, callback))
static int rpm_resume(struct device *dev, int rpmflags);
static int rpm_suspend(struct device *dev, int rpmflags);
/**
* update_pm_runtime_accounting - Update the time accounting of power states
* @dev: Device to update the accounting for
*
* In order to be able to have time accounting of the various power states
* (as used by programs such as PowerTOP to show the effectiveness of runtime
* PM), we need to track the time spent in each state.
* update_pm_runtime_accounting must be called each time before the
* runtime_status field is updated, to account the time in the old state
* correctly.
*/
static void update_pm_runtime_accounting(struct device *dev)
{
u64 now, last, delta;
if (dev->power.disable_depth > 0)
return;
last = dev->power.accounting_timestamp;
now = ktime_get_mono_fast_ns();
dev->power.accounting_timestamp = now;
/*
* Because ktime_get_mono_fast_ns() is not monotonic during
* timekeeping updates, ensure that 'now' is after the last saved
* timesptamp.
*/
if (now < last)
return;
delta = now - last;
if (dev->power.runtime_status == RPM_SUSPENDED)
dev->power.suspended_time += delta;
else
dev->power.active_time += delta;
}
static void __update_runtime_status(struct device *dev, enum rpm_status status)
{
update_pm_runtime_accounting(dev);
dev->power.runtime_status = status;
}
static u64 rpm_get_accounted_time(struct device *dev, bool suspended)
{
u64 time;
unsigned long flags;
spin_lock_irqsave(&dev->power.lock, flags);
update_pm_runtime_accounting(dev);
time = suspended ? dev->power.suspended_time : dev->power.active_time;
spin_unlock_irqrestore(&dev->power.lock, flags);
return time;
}
u64 pm_runtime_active_time(struct device *dev)
{
return rpm_get_accounted_time(dev, false);
}
u64 pm_runtime_suspended_time(struct device *dev)
{
return rpm_get_accounted_time(dev, true);
}
EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
/**
* pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
* @dev: Device to handle.
*/
static void pm_runtime_deactivate_timer(struct device *dev)
{
if (dev->power.timer_expires > 0) {
hrtimer_try_to_cancel(&dev->power.suspend_timer);
dev->power.timer_expires = 0;
}
}
/**
* pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests.
* @dev: Device to handle.
*/
static void pm_runtime_cancel_pending(struct device *dev)
{
pm_runtime_deactivate_timer(dev);
/*
* In case there's a request pending, make sure its work function will
* return without doing anything.
*/
dev->power.request = RPM_REQ_NONE;
}
/*
* pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time.
* @dev: Device to handle.
*
* Compute the autosuspend-delay expiration time based on the device's
* power.last_busy time. If the delay has already expired or is disabled
* (negative) or the power.use_autosuspend flag isn't set, return 0.
* Otherwise return the expiration time in nanoseconds (adjusted to be nonzero).
*
* This function may be called either with or without dev->power.lock held.
* Either way it can be racy, since power.last_busy may be updated at any time.
*/
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
u64 pm_runtime_autosuspend_expiration(struct device *dev)
{
int autosuspend_delay;
u64 expires;
if (!dev->power.use_autosuspend)
return 0;
locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-10-24 05:07:29 +08:00
autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay);
if (autosuspend_delay < 0)
return 0;
expires = READ_ONCE(dev->power.last_busy);
expires += (u64)autosuspend_delay * NSEC_PER_MSEC;
if (expires > ktime_get_mono_fast_ns())
return expires; /* Expires in the future */
return 0;
}
EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
static int dev_memalloc_noio(struct device *dev, void *data)
{
return dev->power.memalloc_noio;
}
/*
* pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
* @dev: Device to handle.
* @enable: True for setting the flag and False for clearing the flag.
*
* Set the flag for all devices in the path from the device to the
* root device in the device tree if @enable is true, otherwise clear
* the flag for devices in the path whose siblings don't set the flag.
*
* The function should only be called by block device, or network
* device driver for solving the deadlock problem during runtime
* resume/suspend:
*
* If memory allocation with GFP_KERNEL is called inside runtime
* resume/suspend callback of any one of its ancestors(or the
* block device itself), the deadlock may be triggered inside the
* memory allocation since it might not complete until the block
* device becomes active and the involed page I/O finishes. The
* situation is pointed out first by Alan Stern. Network device
* are involved in iSCSI kind of situation.
*
* The lock of dev_hotplug_mutex is held in the function for handling
* hotplug race because pm_runtime_set_memalloc_noio() may be called
* in async probe().
*
* The function should be called between device_add() and device_del()
* on the affected device(block/network device).
*/
void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
{
static DEFINE_MUTEX(dev_hotplug_mutex);
mutex_lock(&dev_hotplug_mutex);
for (;;) {
bool enabled;
/* hold power lock since bitfield is not SMP-safe. */
spin_lock_irq(&dev->power.lock);
enabled = dev->power.memalloc_noio;
dev->power.memalloc_noio = enable;
spin_unlock_irq(&dev->power.lock);
/*
* not need to enable ancestors any more if the device
* has been enabled.
*/
if (enabled && enable)
break;
dev = dev->parent;
/*
* clear flag of the parent device only if all the
* children don't set the flag because ancestor's
* flag was set by any one of the descendants.
*/
if (!dev || (!enable &&
device_for_each_child(dev, NULL,
dev_memalloc_noio)))
break;
}
mutex_unlock(&dev_hotplug_mutex);
}
EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);
/**
* rpm_check_suspend_allowed - Test whether a device may be suspended.
* @dev: Device to test.
*/
static int rpm_check_suspend_allowed(struct device *dev)
{
int retval = 0;
if (dev->power.runtime_error)
retval = -EINVAL;
else if (dev->power.disable_depth > 0)
retval = -EACCES;
else if (atomic_read(&dev->power.usage_count) > 0)
retval = -EAGAIN;
else if (!dev->power.ignore_children &&
atomic_read(&dev->power.child_count))
retval = -EBUSY;
/* Pending resume requests take precedence over suspends. */
else if ((dev->power.deferred_resume
&& dev->power.runtime_status == RPM_SUSPENDING)
|| (dev->power.request_pending
&& dev->power.request == RPM_REQ_RESUME))
retval = -EAGAIN;
else if (__dev_pm_qos_resume_latency(dev) == 0)
retval = -EPERM;
else if (dev->power.runtime_status == RPM_SUSPENDED)
retval = 1;
return retval;
}
static int rpm_get_suppliers(struct device *dev)
{
struct device_link *link;
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held()) {
int retval;
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
if (!(link->flags & DL_FLAG_PM_RUNTIME) ||
READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
continue;
retval = pm_runtime_get_sync(link->supplier);
/* Ignore suppliers with disabled runtime PM. */
if (retval < 0 && retval != -EACCES) {
pm_runtime_put_noidle(link->supplier);
return retval;
}
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
refcount_inc(&link->rpm_active);
}
return 0;
}
static void rpm_put_suppliers(struct device *dev)
{
struct device_link *link;
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held()) {
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
continue;
while (refcount_dec_not_one(&link->rpm_active))
pm_runtime_put(link->supplier);
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
}
}
/**
* __rpm_callback - Run a given runtime PM callback for a given device.
* @cb: Runtime PM callback to run.
* @dev: Device to run the callback for.
*/
static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
int retval, idx;
bool use_links = dev->power.links_count > 0;
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
} else {
spin_unlock_irq(&dev->power.lock);
/*
* Resume suppliers if necessary.
*
* The device's runtime PM status cannot change until this
* routine returns, so it is safe to read the status outside of
* the lock.
*/
if (use_links && dev->power.runtime_status == RPM_RESUMING) {
idx = device_links_read_lock();
retval = rpm_get_suppliers(dev);
if (retval)
goto fail;
device_links_read_unlock(idx);
}
}
retval = cb(dev);
if (dev->power.irq_safe) {
spin_lock(&dev->power.lock);
} else {
/*
* If the device is suspending and the callback has returned
* success, drop the usage counters of the suppliers that have
* been reference counted on its resume.
*
* Do that if resume fails too.
*/
if (use_links
&& ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
|| (dev->power.runtime_status == RPM_RESUMING && retval))) {
idx = device_links_read_lock();
fail:
rpm_put_suppliers(dev);
device_links_read_unlock(idx);
}
spin_lock_irq(&dev->power.lock);
}
return retval;
}
/**
* rpm_idle - Notify device bus type if the device can be suspended.
* @dev: Device to notify the bus type about.
* @rpmflags: Flag bits.
*
* Check if the device's runtime PM status allows it to be suspended. If
* another idle notification has been started earlier, return immediately. If
* the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
* run the ->runtime_idle() callback directly. If the ->runtime_idle callback
* doesn't exist or if it returns 0, call rpm_suspend with the RPM_AUTO flag.
*
* This function must be called under dev->power.lock with interrupts disabled.
*/
static int rpm_idle(struct device *dev, int rpmflags)
{
int (*callback)(struct device *);
int retval;
PM / runtime: Add _rcuidle suffix to allow rpm_idle() use from idle This commit appends a few _rcuidle suffixes to fix the following RCU-used-from-idle bug: > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1116 Not tainted > ------------------------------- > include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052cc2c>] __rpm_callback+0x58/0x60 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [<c0110290>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) > [<c010c3a8>] (show_stack) from [<c047fd68>] (dump_stack+0xb0/0xe4) > [<c047fd68>] (dump_stack) from [<c052d5d0>] (rpm_suspend+0x580/0x768) > [<c052d5d0>] (rpm_suspend) from [<c052ec58>] (__pm_runtime_suspend+0x64/0x84) > [<c052ec58>] (__pm_runtime_suspend) from [<c04bf25c>] (omap2_gpio_prepare_for_idle+0x5c/0x70) > [<c04bf25c>] (omap2_gpio_prepare_for_idle) from [<c0125568>] (omap_sram_idle+0x140/0x244) > [<c0125568>] (omap_sram_idle) from [<c01269dc>] (omap3_enter_idle_bm+0xfc/0x1ec) > [<c01269dc>] (omap3_enter_idle_bm) from [<c0601bdc>] (cpuidle_enter_state+0x80/0x3d4) > [<c0601bdc>] (cpuidle_enter_state) from [<c0183b08>] (cpu_startup_entry+0x198/0x3a0) > [<c0183b08>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) > [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!" Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> WhACKED-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 04:03:51 +08:00
trace_rpm_idle_rcuidle(dev, rpmflags);
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
; /* Conditions are wrong. */
/* Idle notifications are allowed only in the RPM_ACTIVE state. */
else if (dev->power.runtime_status != RPM_ACTIVE)
retval = -EAGAIN;
/*
* Any pending request other than an idle notification takes
* precedence over us, except that the timer may be running.
*/
else if (dev->power.request_pending &&
dev->power.request > RPM_REQ_IDLE)
retval = -EAGAIN;
/* Act as though RPM_NOWAIT is always set. */
else if (dev->power.idle_notification)
retval = -EINPROGRESS;
if (retval)
goto out;
/* Pending requests need to be canceled. */
dev->power.request = RPM_REQ_NONE;
if (dev->power.no_callbacks)
goto out;
/* Carry out an asynchronous or a synchronous idle notification. */
if (rpmflags & RPM_ASYNC) {
dev->power.request = RPM_REQ_IDLE;
if (!dev->power.request_pending) {
dev->power.request_pending = true;
queue_work(pm_wq, &dev->power.work);
}
PM / runtime: Add _rcuidle suffix to allow rpm_idle() use from idle This commit appends a few _rcuidle suffixes to fix the following RCU-used-from-idle bug: > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1116 Not tainted > ------------------------------- > include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052cc2c>] __rpm_callback+0x58/0x60 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [<c0110290>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) > [<c010c3a8>] (show_stack) from [<c047fd68>] (dump_stack+0xb0/0xe4) > [<c047fd68>] (dump_stack) from [<c052d5d0>] (rpm_suspend+0x580/0x768) > [<c052d5d0>] (rpm_suspend) from [<c052ec58>] (__pm_runtime_suspend+0x64/0x84) > [<c052ec58>] (__pm_runtime_suspend) from [<c04bf25c>] (omap2_gpio_prepare_for_idle+0x5c/0x70) > [<c04bf25c>] (omap2_gpio_prepare_for_idle) from [<c0125568>] (omap_sram_idle+0x140/0x244) > [<c0125568>] (omap_sram_idle) from [<c01269dc>] (omap3_enter_idle_bm+0xfc/0x1ec) > [<c01269dc>] (omap3_enter_idle_bm) from [<c0601bdc>] (cpuidle_enter_state+0x80/0x3d4) > [<c0601bdc>] (cpuidle_enter_state) from [<c0183b08>] (cpu_startup_entry+0x198/0x3a0) > [<c0183b08>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) > [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!" Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> WhACKED-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 04:03:51 +08:00
trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
return 0;
}
dev->power.idle_notification = true;
callback = RPM_GET_CALLBACK(dev, runtime_idle);
if (callback)
retval = __rpm_callback(callback, dev);
dev->power.idle_notification = false;
wake_up_all(&dev->power.wait_queue);
out:
PM / runtime: Add _rcuidle suffix to allow rpm_idle() use from idle This commit appends a few _rcuidle suffixes to fix the following RCU-used-from-idle bug: > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1116 Not tainted > ------------------------------- > include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052cc2c>] __rpm_callback+0x58/0x60 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [<c0110290>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) > [<c010c3a8>] (show_stack) from [<c047fd68>] (dump_stack+0xb0/0xe4) > [<c047fd68>] (dump_stack) from [<c052d5d0>] (rpm_suspend+0x580/0x768) > [<c052d5d0>] (rpm_suspend) from [<c052ec58>] (__pm_runtime_suspend+0x64/0x84) > [<c052ec58>] (__pm_runtime_suspend) from [<c04bf25c>] (omap2_gpio_prepare_for_idle+0x5c/0x70) > [<c04bf25c>] (omap2_gpio_prepare_for_idle) from [<c0125568>] (omap_sram_idle+0x140/0x244) > [<c0125568>] (omap_sram_idle) from [<c01269dc>] (omap3_enter_idle_bm+0xfc/0x1ec) > [<c01269dc>] (omap3_enter_idle_bm) from [<c0601bdc>] (cpuidle_enter_state+0x80/0x3d4) > [<c0601bdc>] (cpuidle_enter_state) from [<c0183b08>] (cpu_startup_entry+0x198/0x3a0) > [<c0183b08>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) > [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!" Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> WhACKED-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 04:03:51 +08:00
trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
}
/**
* rpm_callback - Run a given runtime PM callback for a given device.
* @cb: Runtime PM callback to run.
* @dev: Device to run the callback for.
*/
static int rpm_callback(int (*cb)(struct device *), struct device *dev)
{
int retval;
if (!cb)
return -ENOSYS;
if (dev->power.memalloc_noio) {
unsigned int noio_flag;
/*
* Deadlock might be caused if memory allocation with
* GFP_KERNEL happens inside runtime_suspend and
* runtime_resume callbacks of one block device's
* ancestor or the block device itself. Network
* device might be thought as part of iSCSI block
* device, so network device and its ancestor should
* be marked as memalloc_noio too.
*/
noio_flag = memalloc_noio_save();
retval = __rpm_callback(cb, dev);
memalloc_noio_restore(noio_flag);
} else {
retval = __rpm_callback(cb, dev);
}
dev->power.runtime_error = retval;
return retval != -EACCES ? retval : -EIO;
}
/**
* rpm_suspend - Carry out runtime suspend of given device.
* @dev: Device to suspend.
* @rpmflags: Flag bits.
*
* Check if the device's runtime PM status allows it to be suspended.
* Cancel a pending idle notification, autosuspend or suspend. If
* another suspend has been started earlier, either return immediately
* or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC
* flags. If the RPM_ASYNC flag is set then queue a suspend request;
* otherwise run the ->runtime_suspend() callback directly. When
* ->runtime_suspend succeeded, if a deferred resume was requested while
* the callback was running then carry it out, otherwise send an idle
* notification for its parent (if the suspend succeeded and both
* ignore_children of parent->power and irq_safe of dev->power are not set).
* If ->runtime_suspend failed with -EAGAIN or -EBUSY, and if the RPM_AUTO
* flag is set and the next autosuspend-delay expiration time is in the
* future, schedule another autosuspend attempt.
*
* This function must be called under dev->power.lock with interrupts disabled.
*/
static int rpm_suspend(struct device *dev, int rpmflags)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
int (*callback)(struct device *);
struct device *parent = NULL;
int retval;
PM / runtime: Use _rcuidle for runtime suspend tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified a few more unprotected uses of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The affected function is rpm_suspend(). The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ Warning from omap3 =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052ee24>] __pm_runtime_suspend+0x54/0x84 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP36xx (Flattened Device Tree) [<c0110308>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) [<c010c3a8>] (show_stack) from [<c047fec8>] (dump_stack+0xb0/0xe4) [<c047fec8>] (dump_stack) from [<c052d7b4>] (rpm_suspend+0x604/0x7e4) [<c052d7b4>] (rpm_suspend) from [<c052ee34>] (__pm_runtime_suspend+0x64/0x84) [<c052ee34>] (__pm_runtime_suspend) from [<c04bf3bc>] (omap2_gpio_prepare_for_idle+0x5c/0x70) [<c04bf3bc>] (omap2_gpio_prepare_for_idle) from [<c01255e8>] (omap_sram_idle+0x140/0x244) [<c01255e8>] (omap_sram_idle) from [<c0126b48>] (omap3_enter_idle_bm+0xfc/0x1ec) [<c0126b48>] (omap3_enter_idle_bm) from [<c0601db8>] (cpuidle_enter_state+0x80/0x3d4) [<c0601db8>] (cpuidle_enter_state) from [<c0183c74>] (cpu_startup_entry+0x198/0x3a0) [<c0183c74>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 01:42:25 +08:00
trace_rpm_suspend_rcuidle(dev, rpmflags);
repeat:
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
; /* Conditions are wrong. */
/* Synchronous suspends are not allowed in the RPM_RESUMING state. */
else if (dev->power.runtime_status == RPM_RESUMING &&
!(rpmflags & RPM_ASYNC))
retval = -EAGAIN;
if (retval)
goto out;
/* If the autosuspend_delay time hasn't expired yet, reschedule. */
if ((rpmflags & RPM_AUTO)
&& dev->power.runtime_status != RPM_SUSPENDING) {
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
u64 expires = pm_runtime_autosuspend_expiration(dev);
if (expires != 0) {
/* Pending requests need to be canceled. */
dev->power.request = RPM_REQ_NONE;
/*
* Optimization: If the timer is already running and is
* set to expire at or before the autosuspend delay,
* avoid the overhead of resetting it. Just let it
* expire; pm_suspend_timer_fn() will take care of the
* rest.
*/
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
if (!(dev->power.timer_expires &&
dev->power.timer_expires <= expires)) {
/*
* We add a slack of 25% to gather wakeups
* without sacrificing the granularity.
*/
u64 slack = (u64)READ_ONCE(dev->power.autosuspend_delay) *
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
(NSEC_PER_MSEC >> 2);
dev->power.timer_expires = expires;
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
hrtimer_start_range_ns(&dev->power.suspend_timer,
ns_to_ktime(expires),
slack,
HRTIMER_MODE_ABS);
}
dev->power.timer_autosuspends = 1;
goto out;
}
}
/* Other scheduled or pending requests need to be canceled. */
pm_runtime_cancel_pending(dev);
if (dev->power.runtime_status == RPM_SUSPENDING) {
DEFINE_WAIT(wait);
if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
retval = -EINPROGRESS;
goto out;
}
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
cpu_relax();
spin_lock(&dev->power.lock);
goto repeat;
}
/* Wait for the other suspend running in parallel with us. */
for (;;) {
prepare_to_wait(&dev->power.wait_queue, &wait,
TASK_UNINTERRUPTIBLE);
if (dev->power.runtime_status != RPM_SUSPENDING)
break;
spin_unlock_irq(&dev->power.lock);
schedule();
spin_lock_irq(&dev->power.lock);
}
finish_wait(&dev->power.wait_queue, &wait);
goto repeat;
}
if (dev->power.no_callbacks)
goto no_callback; /* Assume success. */
/* Carry out an asynchronous or a synchronous suspend. */
if (rpmflags & RPM_ASYNC) {
dev->power.request = (rpmflags & RPM_AUTO) ?
RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND;
if (!dev->power.request_pending) {
dev->power.request_pending = true;
queue_work(pm_wq, &dev->power.work);
}
goto out;
}
__update_runtime_status(dev, RPM_SUSPENDING);
callback = RPM_GET_CALLBACK(dev, runtime_suspend);
PM / wakeirq: Fix dedicated wakeirq for drivers not using autosuspend I noticed some wakeirq flakeyness with consumer drivers not using autosuspend. For drivers not using autosuspend, the wakeirq may never get unmasked in rpm_suspend() because of irq desc->depth. We are configuring dedicated wakeirqs to start with IRQ_NOAUTOEN as we naturally don't want them running until rpm_suspend() is called. However, when a consumer driver initially calls pm_runtime_get(), we now wrongly start with disable_irq_nosync() call on the dedicated wakeirq that is disabled to start with. This causes desc->depth to toggle between 1 and 2 instead of the usual 0 and 1. This can prevent enable_irq() from unmasking the wakeirq as that only happens at desc->depth 1. This does not necessarily show up with drivers using autosuspend as there is time for disable_irq_nosync() before rpm_suspend() gets called after the autosuspend timeout. Let's fix the issue by adding wirq->status that lazily gets set on the first rpm_suspend(). We also need PM runtime core private functions for dev_pm_enable_wake_irq_check() and dev_pm_disable_wake_irq_check() so we can enable the dedicated wakeirq on the first rpm_suspend(). While at it, let's also fix the comments for dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq(). Those can still be used by the consumer drivers as needed because the IRQ core manages the interrupt usecount for us. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-12-06 08:38:16 +08:00
dev_pm_enable_wake_irq_check(dev, true);
retval = rpm_callback(callback, dev);
PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
2011-12-01 07:01:31 +08:00
if (retval)
goto fail;
no_callback:
__update_runtime_status(dev, RPM_SUSPENDED);
pm_runtime_deactivate_timer(dev);
if (dev->parent) {
parent = dev->parent;
atomic_add_unless(&parent->power.child_count, -1, 0);
}
wake_up_all(&dev->power.wait_queue);
if (dev->power.deferred_resume) {
dev->power.deferred_resume = false;
rpm_resume(dev, 0);
retval = -EAGAIN;
goto out;
}
/* Maybe the parent is now able to suspend. */
if (parent && !parent->power.ignore_children && !dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
spin_lock(&parent->power.lock);
rpm_idle(parent, RPM_ASYNC);
spin_unlock(&parent->power.lock);
spin_lock(&dev->power.lock);
}
out:
PM / runtime: Use _rcuidle for runtime suspend tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified a few more unprotected uses of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The affected function is rpm_suspend(). The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ Warning from omap3 =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052ee24>] __pm_runtime_suspend+0x54/0x84 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP36xx (Flattened Device Tree) [<c0110308>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) [<c010c3a8>] (show_stack) from [<c047fec8>] (dump_stack+0xb0/0xe4) [<c047fec8>] (dump_stack) from [<c052d7b4>] (rpm_suspend+0x604/0x7e4) [<c052d7b4>] (rpm_suspend) from [<c052ee34>] (__pm_runtime_suspend+0x64/0x84) [<c052ee34>] (__pm_runtime_suspend) from [<c04bf3bc>] (omap2_gpio_prepare_for_idle+0x5c/0x70) [<c04bf3bc>] (omap2_gpio_prepare_for_idle) from [<c01255e8>] (omap_sram_idle+0x140/0x244) [<c01255e8>] (omap_sram_idle) from [<c0126b48>] (omap3_enter_idle_bm+0xfc/0x1ec) [<c0126b48>] (omap3_enter_idle_bm) from [<c0601db8>] (cpuidle_enter_state+0x80/0x3d4) [<c0601db8>] (cpuidle_enter_state) from [<c0183c74>] (cpu_startup_entry+0x198/0x3a0) [<c0183c74>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 01:42:25 +08:00
trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
return retval;
PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
2011-12-01 07:01:31 +08:00
fail:
PM / wakeirq: Fix dedicated wakeirq for drivers not using autosuspend I noticed some wakeirq flakeyness with consumer drivers not using autosuspend. For drivers not using autosuspend, the wakeirq may never get unmasked in rpm_suspend() because of irq desc->depth. We are configuring dedicated wakeirqs to start with IRQ_NOAUTOEN as we naturally don't want them running until rpm_suspend() is called. However, when a consumer driver initially calls pm_runtime_get(), we now wrongly start with disable_irq_nosync() call on the dedicated wakeirq that is disabled to start with. This causes desc->depth to toggle between 1 and 2 instead of the usual 0 and 1. This can prevent enable_irq() from unmasking the wakeirq as that only happens at desc->depth 1. This does not necessarily show up with drivers using autosuspend as there is time for disable_irq_nosync() before rpm_suspend() gets called after the autosuspend timeout. Let's fix the issue by adding wirq->status that lazily gets set on the first rpm_suspend(). We also need PM runtime core private functions for dev_pm_enable_wake_irq_check() and dev_pm_disable_wake_irq_check() so we can enable the dedicated wakeirq on the first rpm_suspend(). While at it, let's also fix the comments for dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq(). Those can still be used by the consumer drivers as needed because the IRQ core manages the interrupt usecount for us. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-12-06 08:38:16 +08:00
dev_pm_disable_wake_irq_check(dev);
PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
2011-12-01 07:01:31 +08:00
__update_runtime_status(dev, RPM_ACTIVE);
dev->power.deferred_resume = false;
wake_up_all(&dev->power.wait_queue);
PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
2011-12-01 07:01:31 +08:00
if (retval == -EAGAIN || retval == -EBUSY) {
dev->power.runtime_error = 0;
/*
* If the callback routine failed an autosuspend, and
* if the last_busy time has been updated so that there
* is a new autosuspend expiration time, automatically
* reschedule another autosuspend.
*/
if ((rpmflags & RPM_AUTO) &&
pm_runtime_autosuspend_expiration(dev) != 0)
goto repeat;
} else {
pm_runtime_cancel_pending(dev);
}
goto out;
}
/**
* rpm_resume - Carry out runtime resume of given device.
* @dev: Device to resume.
* @rpmflags: Flag bits.
*
* Check if the device's runtime PM status allows it to be resumed. Cancel
* any scheduled or pending requests. If another resume has been started
* earlier, either return immediately or wait for it to finish, depending on the
* RPM_NOWAIT and RPM_ASYNC flags. Similarly, if there's a suspend running in
* parallel with this function, either tell the other process to resume after
* suspending (deferred_resume) or wait for it to finish. If the RPM_ASYNC
* flag is set then queue a resume request; otherwise run the
* ->runtime_resume() callback directly. Queue an idle notification for the
* device if the resume succeeded.
*
* This function must be called under dev->power.lock with interrupts disabled.
*/
static int rpm_resume(struct device *dev, int rpmflags)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
int (*callback)(struct device *);
struct device *parent = NULL;
int retval = 0;
PM / runtime: Add _rcuidle suffix to allow rpm_resume() to be called from idle This commit applies another _rcuidle suffix to fix an RCU use from idle. > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1122 Not tainted > ------------------------------- > include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052e3dc>] __pm_runtime_resume+0x3c/0x64 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [<c0110290>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) > [<c010c3a8>] (show_stack) from [<c047fd68>] (dump_stack+0xb0/0xe4) > [<c047fd68>] (dump_stack) from [<c052e178>] (rpm_resume+0x5cc/0x7f4) > [<c052e178>] (rpm_resume) from [<c052e3ec>] (__pm_runtime_resume+0x4c/0x64) > [<c052e3ec>] (__pm_runtime_resume) from [<c04bf2c4>] (omap2_gpio_resume_after_idle+0x54/0x68) > [<c04bf2c4>] (omap2_gpio_resume_after_idle) from [<c01269dc>] (omap3_enter_idle_bm+0xfc/0x1ec) > [<c01269dc>] (omap3_enter_idle_bm) from [<c060198c>] (cpuidle_enter_state+0x80/0x3d4) > [<c060198c>] (cpuidle_enter_state) from [<c0183b08>] (cpu_startup_entry+0x198/0x3a0) > [<c0183b08>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) > [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 04:38:55 +08:00
trace_rpm_resume_rcuidle(dev, rpmflags);
repeat:
if (dev->power.runtime_error)
retval = -EINVAL;
PM / Runtime: let rpm_resume() succeed if RPM_ACTIVE, even when disabled, v2 There are several drivers where the return value of pm_runtime_get_sync() is used to decide whether or not it is safe to access hardware and that don't provide .suspend() callbacks for system suspend (but may use late/noirq callbacks.) If such a driver happens to call pm_runtime_get_sync() during system suspend, after the core has disabled runtime PM, it will get the error code and will decide that the hardware should not be accessed, although this may be a wrong conclusion, depending on the state of the device when runtime PM was disabled. Drivers might work around this problem by using a test like: ret = pm_runtime_get_sync(dev); if (!ret || (ret == -EACCES && driver_private_data(dev)->suspended)) { /* access hardware */ } where driver_private_data(dev)->suspended is a flag set by the driver's .suspend() method (that would have to be added for this purpose). However, that potentially would need to be done by multiple drivers which means quite a lot of duplicated code and bloat. To avoid that we can use the observation that the core sets dev->power.is_suspended before disabling runtime PM and use that instead of the driver's private flag. Still, potentially many drivers would need to repeat that same check in quite a few places, so it's better to let the core do it. Then we can be a bit smarter and check whether or not runtime PM was disabled by the core only (disable_depth == 1) or by someone else in addition to the core (disable_depth > 1). In the former case rpm_resume() can return 1 if the runtime PM status is RPM_ACTIVE, because it means the device was active when the core disabled runtime PM. In the latter case it should still return -EACCES, because it isn't clear why runtime PM has been disabled. Tested on AM3730/Beagle-xM where a wakeup IRQ firing during the late suspend phase triggers runtime PM activity in the I2C driver since the wakeup IRQ is on an I2C-connected PMIC. [rjw: Modified whitespace to follow the file's convention.] Signed-off-by: Kevin Hilman <khilman@ti.com> Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
2012-09-22 06:47:34 +08:00
else if (dev->power.disable_depth == 1 && dev->power.is_suspended
&& dev->power.runtime_status == RPM_ACTIVE)
retval = 1;
else if (dev->power.disable_depth > 0)
retval = -EACCES;
if (retval)
goto out;
/*
* Other scheduled or pending requests need to be canceled. Small
* optimization: If an autosuspend timer is running, leave it running
* rather than cancelling it now only to restart it again in the near
* future.
*/
dev->power.request = RPM_REQ_NONE;
if (!dev->power.timer_autosuspends)
pm_runtime_deactivate_timer(dev);
if (dev->power.runtime_status == RPM_ACTIVE) {
retval = 1;
goto out;
}
if (dev->power.runtime_status == RPM_RESUMING
|| dev->power.runtime_status == RPM_SUSPENDING) {
DEFINE_WAIT(wait);
if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
if (dev->power.runtime_status == RPM_SUSPENDING)
dev->power.deferred_resume = true;
else
retval = -EINPROGRESS;
goto out;
}
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
cpu_relax();
spin_lock(&dev->power.lock);
goto repeat;
}
/* Wait for the operation carried out in parallel with us. */
for (;;) {
prepare_to_wait(&dev->power.wait_queue, &wait,
TASK_UNINTERRUPTIBLE);
if (dev->power.runtime_status != RPM_RESUMING
&& dev->power.runtime_status != RPM_SUSPENDING)
break;
spin_unlock_irq(&dev->power.lock);
schedule();
spin_lock_irq(&dev->power.lock);
}
finish_wait(&dev->power.wait_queue, &wait);
goto repeat;
}
/*
* See if we can skip waking up the parent. This is safe only if
* power.no_callbacks is set, because otherwise we don't know whether
* the resume will actually succeed.
*/
if (dev->power.no_callbacks && !parent && dev->parent) {
spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING);
if (dev->parent->power.disable_depth > 0
|| dev->parent->power.ignore_children
|| dev->parent->power.runtime_status == RPM_ACTIVE) {
atomic_inc(&dev->parent->power.child_count);
spin_unlock(&dev->parent->power.lock);
retval = 1;
goto no_callback; /* Assume success. */
}
spin_unlock(&dev->parent->power.lock);
}
/* Carry out an asynchronous or a synchronous resume. */
if (rpmflags & RPM_ASYNC) {
dev->power.request = RPM_REQ_RESUME;
if (!dev->power.request_pending) {
dev->power.request_pending = true;
queue_work(pm_wq, &dev->power.work);
}
retval = 0;
goto out;
}
if (!parent && dev->parent) {
/*
* Increment the parent's usage counter and resume it if
* necessary. Not needed if dev is irq-safe; then the
* parent is permanently resumed.
*/
parent = dev->parent;
if (dev->power.irq_safe)
goto skip_parent;
spin_unlock(&dev->power.lock);
pm_runtime_get_noresume(parent);
spin_lock(&parent->power.lock);
/*
* Resume the parent if it has runtime PM enabled and not been
* set to ignore its children.
*/
if (!parent->power.disable_depth
&& !parent->power.ignore_children) {
rpm_resume(parent, 0);
if (parent->power.runtime_status != RPM_ACTIVE)
retval = -EBUSY;
}
spin_unlock(&parent->power.lock);
spin_lock(&dev->power.lock);
if (retval)
goto out;
goto repeat;
}
skip_parent:
if (dev->power.no_callbacks)
goto no_callback; /* Assume success. */
__update_runtime_status(dev, RPM_RESUMING);
callback = RPM_GET_CALLBACK(dev, runtime_resume);
PM / wakeirq: Fix dedicated wakeirq for drivers not using autosuspend I noticed some wakeirq flakeyness with consumer drivers not using autosuspend. For drivers not using autosuspend, the wakeirq may never get unmasked in rpm_suspend() because of irq desc->depth. We are configuring dedicated wakeirqs to start with IRQ_NOAUTOEN as we naturally don't want them running until rpm_suspend() is called. However, when a consumer driver initially calls pm_runtime_get(), we now wrongly start with disable_irq_nosync() call on the dedicated wakeirq that is disabled to start with. This causes desc->depth to toggle between 1 and 2 instead of the usual 0 and 1. This can prevent enable_irq() from unmasking the wakeirq as that only happens at desc->depth 1. This does not necessarily show up with drivers using autosuspend as there is time for disable_irq_nosync() before rpm_suspend() gets called after the autosuspend timeout. Let's fix the issue by adding wirq->status that lazily gets set on the first rpm_suspend(). We also need PM runtime core private functions for dev_pm_enable_wake_irq_check() and dev_pm_disable_wake_irq_check() so we can enable the dedicated wakeirq on the first rpm_suspend(). While at it, let's also fix the comments for dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq(). Those can still be used by the consumer drivers as needed because the IRQ core manages the interrupt usecount for us. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-12-06 08:38:16 +08:00
dev_pm_disable_wake_irq_check(dev);
retval = rpm_callback(callback, dev);
if (retval) {
__update_runtime_status(dev, RPM_SUSPENDED);
pm_runtime_cancel_pending(dev);
PM / wakeirq: Fix dedicated wakeirq for drivers not using autosuspend I noticed some wakeirq flakeyness with consumer drivers not using autosuspend. For drivers not using autosuspend, the wakeirq may never get unmasked in rpm_suspend() because of irq desc->depth. We are configuring dedicated wakeirqs to start with IRQ_NOAUTOEN as we naturally don't want them running until rpm_suspend() is called. However, when a consumer driver initially calls pm_runtime_get(), we now wrongly start with disable_irq_nosync() call on the dedicated wakeirq that is disabled to start with. This causes desc->depth to toggle between 1 and 2 instead of the usual 0 and 1. This can prevent enable_irq() from unmasking the wakeirq as that only happens at desc->depth 1. This does not necessarily show up with drivers using autosuspend as there is time for disable_irq_nosync() before rpm_suspend() gets called after the autosuspend timeout. Let's fix the issue by adding wirq->status that lazily gets set on the first rpm_suspend(). We also need PM runtime core private functions for dev_pm_enable_wake_irq_check() and dev_pm_disable_wake_irq_check() so we can enable the dedicated wakeirq on the first rpm_suspend(). While at it, let's also fix the comments for dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq(). Those can still be used by the consumer drivers as needed because the IRQ core manages the interrupt usecount for us. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Signed-off-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-12-06 08:38:16 +08:00
dev_pm_enable_wake_irq_check(dev, false);
} else {
no_callback:
__update_runtime_status(dev, RPM_ACTIVE);
pm_runtime_mark_last_busy(dev);
if (parent)
atomic_inc(&parent->power.child_count);
}
wake_up_all(&dev->power.wait_queue);
if (retval >= 0)
rpm_idle(dev, RPM_ASYNC);
out:
if (parent && !dev->power.irq_safe) {
spin_unlock_irq(&dev->power.lock);
pm_runtime_put(parent);
spin_lock_irq(&dev->power.lock);
}
PM / runtime: Add _rcuidle suffix to allow rpm_resume() to be called from idle This commit applies another _rcuidle suffix to fix an RCU use from idle. > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1122 Not tainted > ------------------------------- > include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [<c052e3dc>] __pm_runtime_resume+0x3c/0x64 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [<c0110290>] (unwind_backtrace) from [<c010c3a8>] (show_stack+0x10/0x14) > [<c010c3a8>] (show_stack) from [<c047fd68>] (dump_stack+0xb0/0xe4) > [<c047fd68>] (dump_stack) from [<c052e178>] (rpm_resume+0x5cc/0x7f4) > [<c052e178>] (rpm_resume) from [<c052e3ec>] (__pm_runtime_resume+0x4c/0x64) > [<c052e3ec>] (__pm_runtime_resume) from [<c04bf2c4>] (omap2_gpio_resume_after_idle+0x54/0x68) > [<c04bf2c4>] (omap2_gpio_resume_after_idle) from [<c01269dc>] (omap3_enter_idle_bm+0xfc/0x1ec) > [<c01269dc>] (omap3_enter_idle_bm) from [<c060198c>] (cpuidle_enter_state+0x80/0x3d4) > [<c060198c>] (cpuidle_enter_state) from [<c0183b08>] (cpu_startup_entry+0x198/0x3a0) > [<c0183b08>] (cpu_startup_entry) from [<c0b00c0c>] (start_kernel+0x354/0x3c8) > [<c0b00c0c>] (start_kernel) from [<8000807c>] (0x8000807c) Reported-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Tony Lindgren <tony@atomide.com> Tested-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-04-27 04:38:55 +08:00
trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
return retval;
}
/**
* pm_runtime_work - Universal runtime PM work function.
* @work: Work structure used for scheduling the execution of this function.
*
* Use @work to get the device object the work is to be done for, determine what
* is to be done and execute the appropriate runtime PM function.
*/
static void pm_runtime_work(struct work_struct *work)
{
struct device *dev = container_of(work, struct device, power.work);
enum rpm_request req;
spin_lock_irq(&dev->power.lock);
if (!dev->power.request_pending)
goto out;
req = dev->power.request;
dev->power.request = RPM_REQ_NONE;
dev->power.request_pending = false;
switch (req) {
case RPM_REQ_NONE:
break;
case RPM_REQ_IDLE:
rpm_idle(dev, RPM_NOWAIT);
break;
case RPM_REQ_SUSPEND:
rpm_suspend(dev, RPM_NOWAIT);
break;
case RPM_REQ_AUTOSUSPEND:
rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
break;
case RPM_REQ_RESUME:
rpm_resume(dev, RPM_NOWAIT);
break;
}
out:
spin_unlock_irq(&dev->power.lock);
}
/**
* pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
* @data: Device pointer passed by pm_schedule_suspend().
*
* Check if the time is right and queue a suspend request.
*/
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer)
{
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
struct device *dev = container_of(timer, struct device, power.suspend_timer);
unsigned long flags;
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
u64 expires;
spin_lock_irqsave(&dev->power.lock, flags);
expires = dev->power.timer_expires;
/*
* If 'expires' is after the current time, we've been called
* too early.
*/
if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
dev->power.timer_expires = 0;
rpm_suspend(dev, dev->power.timer_autosuspends ?
(RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
}
spin_unlock_irqrestore(&dev->power.lock, flags);
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
return HRTIMER_NORESTART;
}
/**
* pm_schedule_suspend - Set up a timer to submit a suspend request in future.
* @dev: Device to suspend.
* @delay: Time to wait before submitting a suspend request, in milliseconds.
*/
int pm_schedule_suspend(struct device *dev, unsigned int delay)
{
unsigned long flags;
u64 expires;
int retval;
spin_lock_irqsave(&dev->power.lock, flags);
if (!delay) {
retval = rpm_suspend(dev, RPM_ASYNC);
goto out;
}
retval = rpm_check_suspend_allowed(dev);
if (retval)
goto out;
/* Other scheduled or pending requests need to be canceled. */
pm_runtime_cancel_pending(dev);
expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC;
dev->power.timer_expires = expires;
dev->power.timer_autosuspends = 0;
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS);
out:
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
}
EXPORT_SYMBOL_GPL(pm_schedule_suspend);
/**
* __pm_runtime_idle - Entry point for runtime idle operations.
* @dev: Device to send idle notification for.
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
* return immediately if it is larger than zero. Then carry out an idle
* notification, either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
* or if pm_runtime_irq_safe() has been called.
*/
int __pm_runtime_idle(struct device *dev, int rpmflags)
{
unsigned long flags;
int retval;
if (rpmflags & RPM_GET_PUT) {
if (!atomic_dec_and_test(&dev->power.usage_count)) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
}
PM / runtime: Avoid false-positive warnings from might_sleep_if() The might_sleep_if() assertions in __pm_runtime_idle(), __pm_runtime_suspend() and __pm_runtime_resume() may generate false-positive warnings in some situations. For example, that happens if a nested pm_runtime_get_sync()/pm_runtime_put() pair is executed with disabled interrupts within an outer pm_runtime_get_sync()/pm_runtime_put() section for the same device. [Generally, pm_runtime_get_sync() may sleep, so it should not be called with disabled interrupts, but in this particular case the previous pm_runtime_get_sync() guarantees that the device will not be suspended, so the inner pm_runtime_get_sync() will return immediately after incrementing the device's usage counter.] That started to happen in the i915 driver in 4.10-rc, leading to the following splat: BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1032 in_atomic(): 1, irqs_disabled(): 0, pid: 1500, name: Xorg 1 lock held by Xorg/1500: #0: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0680c13>] i915_mutex_lock_interruptible+0x43/0x140 [i915] CPU: 0 PID: 1500 Comm: Xorg Not tainted Call Trace: dump_stack+0x85/0xc2 ___might_sleep+0x196/0x260 __might_sleep+0x53/0xb0 __pm_runtime_resume+0x7a/0x90 intel_runtime_pm_get+0x25/0x90 [i915] aliasing_gtt_bind_vma+0xaa/0xf0 [i915] i915_vma_bind+0xaf/0x1e0 [i915] i915_gem_execbuffer_relocate_entry+0x513/0x6f0 [i915] i915_gem_execbuffer_relocate_vma.isra.34+0x188/0x250 [i915] ? trace_hardirqs_on+0xd/0x10 ? i915_gem_execbuffer_reserve_vma.isra.31+0x152/0x1f0 [i915] ? i915_gem_execbuffer_reserve.isra.32+0x372/0x3a0 [i915] i915_gem_do_execbuffer.isra.38+0xa70/0x1a40 [i915] ? __might_fault+0x4e/0xb0 i915_gem_execbuffer2+0xc5/0x260 [i915] ? __might_fault+0x4e/0xb0 drm_ioctl+0x206/0x450 [drm] ? i915_gem_execbuffer+0x340/0x340 [i915] ? __fget+0x5/0x200 do_vfs_ioctl+0x91/0x6f0 ? __fget+0x111/0x200 ? __fget+0x5/0x200 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc6 even though the code triggering it is correct. Unfortunately, the might_sleep_if() assertions in question are too coarse-grained to cover such cases correctly, so make them a bit less sensitive in order to avoid the false-positives. Reported-and-tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2017-02-04 07:44:36 +08:00
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
spin_lock_irqsave(&dev->power.lock, flags);
retval = rpm_idle(dev, rpmflags);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_idle);
/**
* __pm_runtime_suspend - Entry point for runtime put/suspend operations.
* @dev: Device to suspend.
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
* return immediately if it is larger than zero. Then carry out a suspend,
* either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
* or if pm_runtime_irq_safe() has been called.
*/
int __pm_runtime_suspend(struct device *dev, int rpmflags)
{
unsigned long flags;
int retval;
if (rpmflags & RPM_GET_PUT) {
if (!atomic_dec_and_test(&dev->power.usage_count)) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
}
PM / runtime: Avoid false-positive warnings from might_sleep_if() The might_sleep_if() assertions in __pm_runtime_idle(), __pm_runtime_suspend() and __pm_runtime_resume() may generate false-positive warnings in some situations. For example, that happens if a nested pm_runtime_get_sync()/pm_runtime_put() pair is executed with disabled interrupts within an outer pm_runtime_get_sync()/pm_runtime_put() section for the same device. [Generally, pm_runtime_get_sync() may sleep, so it should not be called with disabled interrupts, but in this particular case the previous pm_runtime_get_sync() guarantees that the device will not be suspended, so the inner pm_runtime_get_sync() will return immediately after incrementing the device's usage counter.] That started to happen in the i915 driver in 4.10-rc, leading to the following splat: BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1032 in_atomic(): 1, irqs_disabled(): 0, pid: 1500, name: Xorg 1 lock held by Xorg/1500: #0: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0680c13>] i915_mutex_lock_interruptible+0x43/0x140 [i915] CPU: 0 PID: 1500 Comm: Xorg Not tainted Call Trace: dump_stack+0x85/0xc2 ___might_sleep+0x196/0x260 __might_sleep+0x53/0xb0 __pm_runtime_resume+0x7a/0x90 intel_runtime_pm_get+0x25/0x90 [i915] aliasing_gtt_bind_vma+0xaa/0xf0 [i915] i915_vma_bind+0xaf/0x1e0 [i915] i915_gem_execbuffer_relocate_entry+0x513/0x6f0 [i915] i915_gem_execbuffer_relocate_vma.isra.34+0x188/0x250 [i915] ? trace_hardirqs_on+0xd/0x10 ? i915_gem_execbuffer_reserve_vma.isra.31+0x152/0x1f0 [i915] ? i915_gem_execbuffer_reserve.isra.32+0x372/0x3a0 [i915] i915_gem_do_execbuffer.isra.38+0xa70/0x1a40 [i915] ? __might_fault+0x4e/0xb0 i915_gem_execbuffer2+0xc5/0x260 [i915] ? __might_fault+0x4e/0xb0 drm_ioctl+0x206/0x450 [drm] ? i915_gem_execbuffer+0x340/0x340 [i915] ? __fget+0x5/0x200 do_vfs_ioctl+0x91/0x6f0 ? __fget+0x111/0x200 ? __fget+0x5/0x200 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc6 even though the code triggering it is correct. Unfortunately, the might_sleep_if() assertions in question are too coarse-grained to cover such cases correctly, so make them a bit less sensitive in order to avoid the false-positives. Reported-and-tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2017-02-04 07:44:36 +08:00
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
spin_lock_irqsave(&dev->power.lock, flags);
retval = rpm_suspend(dev, rpmflags);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_suspend);
/**
* __pm_runtime_resume - Entry point for runtime resume operations.
* @dev: Device to resume.
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, increment the device's usage count. Then
* carry out a resume, either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
* or if pm_runtime_irq_safe() has been called.
*/
int __pm_runtime_resume(struct device *dev, int rpmflags)
{
unsigned long flags;
int retval;
PM / runtime: Avoid false-positive warnings from might_sleep_if() The might_sleep_if() assertions in __pm_runtime_idle(), __pm_runtime_suspend() and __pm_runtime_resume() may generate false-positive warnings in some situations. For example, that happens if a nested pm_runtime_get_sync()/pm_runtime_put() pair is executed with disabled interrupts within an outer pm_runtime_get_sync()/pm_runtime_put() section for the same device. [Generally, pm_runtime_get_sync() may sleep, so it should not be called with disabled interrupts, but in this particular case the previous pm_runtime_get_sync() guarantees that the device will not be suspended, so the inner pm_runtime_get_sync() will return immediately after incrementing the device's usage counter.] That started to happen in the i915 driver in 4.10-rc, leading to the following splat: BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1032 in_atomic(): 1, irqs_disabled(): 0, pid: 1500, name: Xorg 1 lock held by Xorg/1500: #0: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0680c13>] i915_mutex_lock_interruptible+0x43/0x140 [i915] CPU: 0 PID: 1500 Comm: Xorg Not tainted Call Trace: dump_stack+0x85/0xc2 ___might_sleep+0x196/0x260 __might_sleep+0x53/0xb0 __pm_runtime_resume+0x7a/0x90 intel_runtime_pm_get+0x25/0x90 [i915] aliasing_gtt_bind_vma+0xaa/0xf0 [i915] i915_vma_bind+0xaf/0x1e0 [i915] i915_gem_execbuffer_relocate_entry+0x513/0x6f0 [i915] i915_gem_execbuffer_relocate_vma.isra.34+0x188/0x250 [i915] ? trace_hardirqs_on+0xd/0x10 ? i915_gem_execbuffer_reserve_vma.isra.31+0x152/0x1f0 [i915] ? i915_gem_execbuffer_reserve.isra.32+0x372/0x3a0 [i915] i915_gem_do_execbuffer.isra.38+0xa70/0x1a40 [i915] ? __might_fault+0x4e/0xb0 i915_gem_execbuffer2+0xc5/0x260 [i915] ? __might_fault+0x4e/0xb0 drm_ioctl+0x206/0x450 [drm] ? i915_gem_execbuffer+0x340/0x340 [i915] ? __fget+0x5/0x200 do_vfs_ioctl+0x91/0x6f0 ? __fget+0x111/0x200 ? __fget+0x5/0x200 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x23/0xc6 even though the code triggering it is correct. Unfortunately, the might_sleep_if() assertions in question are too coarse-grained to cover such cases correctly, so make them a bit less sensitive in order to avoid the false-positives. Reported-and-tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2017-02-04 07:44:36 +08:00
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
dev->power.runtime_status != RPM_ACTIVE);
if (rpmflags & RPM_GET_PUT)
atomic_inc(&dev->power.usage_count);
spin_lock_irqsave(&dev->power.lock, flags);
retval = rpm_resume(dev, rpmflags);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_resume);
/**
* pm_runtime_get_if_in_use - Conditionally bump up the device's usage counter.
* @dev: Device to handle.
*
* Return -EINVAL if runtime PM is disabled for the device.
*
* If that's not the case and if the device's runtime PM status is RPM_ACTIVE
* and the runtime PM usage counter is nonzero, increment the counter and
* return 1. Otherwise return 0 without changing the counter.
*/
int pm_runtime_get_if_in_use(struct device *dev)
{
unsigned long flags;
int retval;
spin_lock_irqsave(&dev->power.lock, flags);
retval = dev->power.disable_depth > 0 ? -EINVAL :
dev->power.runtime_status == RPM_ACTIVE
&& atomic_inc_not_zero(&dev->power.usage_count);
trace_rpm_usage_rcuidle(dev, 0);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
}
EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use);
/**
* __pm_runtime_set_status - Set runtime PM status of a device.
* @dev: Device to handle.
* @status: New runtime PM status of the device.
*
* If runtime PM of the device is disabled or its power.runtime_error field is
* different from zero, the status may be changed either to RPM_ACTIVE, or to
* RPM_SUSPENDED, as long as that reflects the actual state of the device.
* However, if the device has a parent and the parent is not active, and the
* parent's power.ignore_children flag is unset, the device's status cannot be
* set to RPM_ACTIVE, so -EBUSY is returned in that case.
*
* If successful, __pm_runtime_set_status() clears the power.runtime_error field
* and the device parent's counter of unsuspended children is modified to
* reflect the new status. If the new status is RPM_SUSPENDED, an idle
* notification request for the parent is submitted.
*
* If @dev has any suppliers (as reflected by device links to them), and @status
* is RPM_ACTIVE, they will be activated upfront and if the activation of one
* of them fails, the status of @dev will be changed to RPM_SUSPENDED (instead
* of the @status value) and the suppliers will be deacticated on exit. The
* error returned by the failing supplier activation will be returned in that
* case.
*/
int __pm_runtime_set_status(struct device *dev, unsigned int status)
{
struct device *parent = dev->parent;
bool notify_parent = false;
int error = 0;
if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
return -EINVAL;
spin_lock_irq(&dev->power.lock);
/*
* Prevent PM-runtime from being enabled for the device or return an
* error if it is enabled already and working.
*/
if (dev->power.runtime_error || dev->power.disable_depth)
dev->power.disable_depth++;
else
error = -EAGAIN;
spin_unlock_irq(&dev->power.lock);
if (error)
return error;
/*
* If the new status is RPM_ACTIVE, the suppliers can be activated
* upfront regardless of the current status, because next time
* rpm_put_suppliers() runs, the rpm_active refcounts of the links
* involved will be dropped down to one anyway.
*/
if (status == RPM_ACTIVE) {
int idx = device_links_read_lock();
error = rpm_get_suppliers(dev);
if (error)
status = RPM_SUSPENDED;
device_links_read_unlock(idx);
}
spin_lock_irq(&dev->power.lock);
if (dev->power.runtime_status == status || !parent)
goto out_set;
if (status == RPM_SUSPENDED) {
atomic_add_unless(&parent->power.child_count, -1, 0);
notify_parent = !parent->power.ignore_children;
} else {
spin_lock_nested(&parent->power.lock, SINGLE_DEPTH_NESTING);
/*
* It is invalid to put an active child under a parent that is
* not active, has runtime PM enabled and the
* 'power.ignore_children' flag unset.
*/
if (!parent->power.disable_depth
&& !parent->power.ignore_children
&& parent->power.runtime_status != RPM_ACTIVE) {
dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n",
dev_name(dev),
dev_name(parent));
error = -EBUSY;
} else if (dev->power.runtime_status == RPM_SUSPENDED) {
atomic_inc(&parent->power.child_count);
}
spin_unlock(&parent->power.lock);
if (error) {
status = RPM_SUSPENDED;
goto out;
}
}
out_set:
__update_runtime_status(dev, status);
if (!error)
dev->power.runtime_error = 0;
out:
spin_unlock_irq(&dev->power.lock);
if (notify_parent)
pm_request_idle(parent);
if (status == RPM_SUSPENDED) {
int idx = device_links_read_lock();
rpm_put_suppliers(dev);
device_links_read_unlock(idx);
}
pm_runtime_enable(dev);
return error;
}
EXPORT_SYMBOL_GPL(__pm_runtime_set_status);
/**
* __pm_runtime_barrier - Cancel pending requests and wait for completions.
* @dev: Device to handle.
*
* Flush all pending requests for the device from pm_wq and wait for all
* runtime PM operations involving the device in progress to complete.
*
* Should be called under dev->power.lock with interrupts disabled.
*/
static void __pm_runtime_barrier(struct device *dev)
{
pm_runtime_deactivate_timer(dev);
if (dev->power.request_pending) {
dev->power.request = RPM_REQ_NONE;
spin_unlock_irq(&dev->power.lock);
cancel_work_sync(&dev->power.work);
spin_lock_irq(&dev->power.lock);
dev->power.request_pending = false;
}
if (dev->power.runtime_status == RPM_SUSPENDING
|| dev->power.runtime_status == RPM_RESUMING
|| dev->power.idle_notification) {
DEFINE_WAIT(wait);
/* Suspend, wake-up or idle notification in progress. */
for (;;) {
prepare_to_wait(&dev->power.wait_queue, &wait,
TASK_UNINTERRUPTIBLE);
if (dev->power.runtime_status != RPM_SUSPENDING
&& dev->power.runtime_status != RPM_RESUMING
&& !dev->power.idle_notification)
break;
spin_unlock_irq(&dev->power.lock);
schedule();
spin_lock_irq(&dev->power.lock);
}
finish_wait(&dev->power.wait_queue, &wait);
}
}
/**
* pm_runtime_barrier - Flush pending requests and wait for completions.
* @dev: Device to handle.
*
* Prevent the device from being suspended by incrementing its usage counter and
* if there's a pending resume request for the device, wake the device up.
* Next, make sure that all pending requests for the device have been flushed
* from pm_wq and wait for all runtime PM operations involving the device in
* progress to complete.
*
* Return value:
* 1, if there was a resume request pending and the device had to be woken up,
* 0, otherwise
*/
int pm_runtime_barrier(struct device *dev)
{
int retval = 0;
pm_runtime_get_noresume(dev);
spin_lock_irq(&dev->power.lock);
if (dev->power.request_pending
&& dev->power.request == RPM_REQ_RESUME) {
rpm_resume(dev, 0);
retval = 1;
}
__pm_runtime_barrier(dev);
spin_unlock_irq(&dev->power.lock);
pm_runtime_put_noidle(dev);
return retval;
}
EXPORT_SYMBOL_GPL(pm_runtime_barrier);
/**
* __pm_runtime_disable - Disable runtime PM of a device.
* @dev: Device to handle.
* @check_resume: If set, check if there's a resume request for the device.
*
* Increment power.disable_depth for the device and if it was zero previously,
* cancel all pending runtime PM requests for the device and wait for all
* operations in progress to complete. The device can be either active or
* suspended after its runtime PM has been disabled.
*
* If @check_resume is set and there's a resume request pending when
* __pm_runtime_disable() is called and power.disable_depth is zero, the
* function will wake up the device before disabling its runtime PM.
*/
void __pm_runtime_disable(struct device *dev, bool check_resume)
{
spin_lock_irq(&dev->power.lock);
if (dev->power.disable_depth > 0) {
dev->power.disable_depth++;
goto out;
}
/*
* Wake up the device if there's a resume request pending, because that
* means there probably is some I/O to process and disabling runtime PM
* shouldn't prevent the device from processing the I/O.
*/
if (check_resume && dev->power.request_pending
&& dev->power.request == RPM_REQ_RESUME) {
/*
* Prevent suspends and idle notifications from being carried
* out after we have woken up the device.
*/
pm_runtime_get_noresume(dev);
rpm_resume(dev, 0);
pm_runtime_put_noidle(dev);
}
/* Update time accounting before disabling PM-runtime. */
update_pm_runtime_accounting(dev);
if (!dev->power.disable_depth++)
__pm_runtime_barrier(dev);
out:
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(__pm_runtime_disable);
/**
* pm_runtime_enable - Enable runtime PM of a device.
* @dev: Device to handle.
*/
void pm_runtime_enable(struct device *dev)
{
unsigned long flags;
spin_lock_irqsave(&dev->power.lock, flags);
if (dev->power.disable_depth > 0) {
dev->power.disable_depth--;
/* About to enable runtime pm, set accounting_timestamp to now */
if (!dev->power.disable_depth)
dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
} else {
dev_warn(dev, "Unbalanced %s!\n", __func__);
}
WARN(!dev->power.disable_depth &&
dev->power.runtime_status == RPM_SUSPENDED &&
!dev->power.ignore_children &&
atomic_read(&dev->power.child_count) > 0,
"Enabling runtime PM for inactive device (%s) with active children\n",
dev_name(dev));
spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_runtime_enable);
/**
* pm_runtime_forbid - Block runtime PM of a device.
* @dev: Device to handle.
*
* Increase the device's usage count and clear its power.runtime_auto flag,
* so that it cannot be suspended at run time until pm_runtime_allow() is called
* for it.
*/
void pm_runtime_forbid(struct device *dev)
{
spin_lock_irq(&dev->power.lock);
if (!dev->power.runtime_auto)
goto out;
dev->power.runtime_auto = false;
atomic_inc(&dev->power.usage_count);
rpm_resume(dev, 0);
out:
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_forbid);
/**
* pm_runtime_allow - Unblock runtime PM of a device.
* @dev: Device to handle.
*
* Decrease the device's usage count and set its power.runtime_auto flag.
*/
void pm_runtime_allow(struct device *dev)
{
spin_lock_irq(&dev->power.lock);
if (dev->power.runtime_auto)
goto out;
dev->power.runtime_auto = true;
if (atomic_dec_and_test(&dev->power.usage_count))
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
else
trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
out:
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_allow);
/**
* pm_runtime_no_callbacks - Ignore runtime PM callbacks for a device.
* @dev: Device to handle.
*
* Set the power.no_callbacks flag, which tells the PM core that this
* device is power-managed through its parent and has no runtime PM
* callbacks of its own. The runtime sysfs attributes will be removed.
*/
void pm_runtime_no_callbacks(struct device *dev)
{
spin_lock_irq(&dev->power.lock);
dev->power.no_callbacks = 1;
spin_unlock_irq(&dev->power.lock);
if (device_is_registered(dev))
rpm_sysfs_remove(dev);
}
EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
/**
* pm_runtime_irq_safe - Leave interrupts disabled during callbacks.
* @dev: Device to handle
*
* Set the power.irq_safe flag, which tells the PM core that the
* ->runtime_suspend() and ->runtime_resume() callbacks for this device should
* always be invoked with the spinlock held and interrupts disabled. It also
* causes the parent's usage counter to be permanently incremented, preventing
* the parent from runtime suspending -- otherwise an irq-safe child might have
* to wait for a non-irq-safe parent.
*/
void pm_runtime_irq_safe(struct device *dev)
{
if (dev->parent)
pm_runtime_get_sync(dev->parent);
spin_lock_irq(&dev->power.lock);
dev->power.irq_safe = 1;
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_irq_safe);
/**
* update_autosuspend - Handle a change to a device's autosuspend settings.
* @dev: Device to handle.
* @old_delay: The former autosuspend_delay value.
* @old_use: The former use_autosuspend value.
*
* Prevent runtime suspend if the new delay is negative and use_autosuspend is
* set; otherwise allow it. Send an idle notification if suspends are allowed.
*
* This function must be called under dev->power.lock with interrupts disabled.
*/
static void update_autosuspend(struct device *dev, int old_delay, int old_use)
{
int delay = dev->power.autosuspend_delay;
/* Should runtime suspend be prevented now? */
if (dev->power.use_autosuspend && delay < 0) {
/* If it used to be allowed then prevent it. */
if (!old_use || old_delay >= 0) {
atomic_inc(&dev->power.usage_count);
rpm_resume(dev, 0);
} else {
trace_rpm_usage_rcuidle(dev, 0);
}
}
/* Runtime suspend should be allowed now. */
else {
/* If it used to be prevented then allow it. */
if (old_use && old_delay < 0)
atomic_dec(&dev->power.usage_count);
/* Maybe we can autosuspend now. */
rpm_idle(dev, RPM_AUTO);
}
}
/**
* pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value.
* @dev: Device to handle.
* @delay: Value of the new delay in milliseconds.
*
* Set the device's power.autosuspend_delay value. If it changes to negative
* and the power.use_autosuspend flag is set, prevent runtime suspends. If it
* changes the other way, allow runtime suspends.
*/
void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
{
int old_delay, old_use;
spin_lock_irq(&dev->power.lock);
old_delay = dev->power.autosuspend_delay;
old_use = dev->power.use_autosuspend;
dev->power.autosuspend_delay = delay;
update_autosuspend(dev, old_delay, old_use);
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);
/**
* __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag.
* @dev: Device to handle.
* @use: New value for use_autosuspend.
*
* Set the device's power.use_autosuspend flag, and allow or prevent runtime
* suspends as needed.
*/
void __pm_runtime_use_autosuspend(struct device *dev, bool use)
{
int old_delay, old_use;
spin_lock_irq(&dev->power.lock);
old_delay = dev->power.autosuspend_delay;
old_use = dev->power.use_autosuspend;
dev->power.use_autosuspend = use;
update_autosuspend(dev, old_delay, old_use);
spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
/**
* pm_runtime_init - Initialize runtime PM fields in given device object.
* @dev: Device object to initialize.
*/
void pm_runtime_init(struct device *dev)
{
dev->power.runtime_status = RPM_SUSPENDED;
dev->power.idle_notification = false;
dev->power.disable_depth = 1;
atomic_set(&dev->power.usage_count, 0);
dev->power.runtime_error = 0;
atomic_set(&dev->power.child_count, 0);
pm_suspend_ignore_children(dev, false);
dev->power.runtime_auto = true;
dev->power.request_pending = false;
dev->power.request = RPM_REQ_NONE;
dev->power.deferred_resume = false;
INIT_WORK(&dev->power.work, pm_runtime_work);
dev->power.timer_expires = 0;
PM-runtime: Switch autosuspend over to using hrtimers PM-runtime uses the timer infrastructure for autosuspend. This implies that the minimum time before autosuspending a device is in the range of 1 tick included to 2 ticks excluded -On arm64 this means between 4ms and 8ms with default jiffies configuration -And on arm, it is between 10ms and 20ms These values are quite high for embedded systems which sometimes want the duration to be in the range of 1 ms. It is possible to switch autosuspend over to using hrtimers to get finer granularity for short durations and take advantage of slack to retain some margins and get long timeouts with minimum wakeups. On an arm64 platform that uses 1ms for autosuspending timeout of its GPU, idle power is reduced by 10% with hrtimer. The latency impact on arm64 hikey octo cores is: - mark_last_busy: from 1.11 us to 1.25 us - rpm_suspend: from 15.54 us to 15.38 us [Only the code path of rpm_suspend() that starts hrtimer has been measured.] arm64 image (arm64 default defconfig) decreases by around 3KB with following details: $ size vmlinux-timer text data bss dec hex filename 12034646 6869268 386840 19290754 1265a82 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 12030550 6870164 387032 19287746 1264ec2 vmlinux The latency impact on arm 32bits snowball dual cores is : - mark_last_busy: from 0.31 us usec to 0.77 us - rpm_suspend: from 6.83 us to 6.67 usec The increase of the image for snowball platform that I used for testing performance impact, is neglictable (244B). $ size vmlinux-timer text data bss dec hex filename 7157961 2119580 264120 9541661 91981d build-ux500/vmlinux size vmlinux-hrtimer text data bss dec hex filename 7157773 2119884 264248 9541905 919911 vmlinux-hrtimer And arm 32bits image (multi_v7_defconfig) increases by around 1.7KB with following details: $ size vmlinux-timer text data bss dec hex filename 13304443 6803420 402768 20510631 138f7a7 vmlinux $ size vmlinux-hrtimer text data bss dec hex filename 13304299 6805276 402768 20512343 138fe57 vmlinux Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2018-12-14 22:22:25 +08:00
hrtimer_init(&dev->power.suspend_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
dev->power.suspend_timer.function = pm_suspend_timer_fn;
init_waitqueue_head(&dev->power.wait_queue);
}
PM / runtime: Re-init runtime PM states at probe error and driver unbind There are two common expectations among several subsystems/drivers that deploys runtime PM support, but which isn't met by the driver core. Expectation 1) At ->probe() the subsystem/driver expects the runtime PM status of the device to be RPM_SUSPENDED, which is the initial status being assigned at device registration. This expectation is especially common among some of those subsystems/ drivers that manages devices with an attached PM domain, as those requires the ->runtime_resume() callback at the PM domain level to be invoked during ->probe(). Moreover these subsystems/drivers entirely relies on runtime PM resources being managed at the PM domain level, thus don't implement their own set of runtime PM callbacks. These are two scenarios that suffers from this unmet expectation. i) A failed ->probe() sequence requests probe deferral: ->probe() ... pm_runtime_enable() pm_runtime_get_sync() ... err: pm_runtime_put() pm_runtime_disable() ... As there are no guarantees that such sequence turns the runtime PM status of the device into RPM_SUSPENDED, the re-trying ->probe() may start with the status in RPM_ACTIVE. In such case the runtime PM core won't invoke the ->runtime_resume() callback because of a pm_runtime_get_sync(), as it considers the device to be already runtime resumed. ii) A driver re-bind sequence: At driver unbind, the subsystem/driver's >remove() callback invokes a sequence of runtime PM APIs, to undo actions during ->probe() and to put the device into low power state. ->remove() ... pm_runtime_put() pm_runtime_disable() ... Similar as in the failing ->probe() case, this sequence don't guarantee the runtime PM status of the device to turn into RPM_SUSPENDED. Trying to re-bind the driver thus causes the same issue as when re-trying ->probe(), in the probe deferral scenario. Expectation 2) Drivers that invokes the pm_runtime_irq_safe() API during ->probe(), triggers the runtime PM core to increase the usage count for the device's parent and permanently make it runtime resumed. The usage count is only dropped at device removal, which also allows it to be runtime suspended again. A re-trying ->probe() repeats the call to pm_runtime_irq_safe() and thus once more triggers the usage count of the device's parent to be increased. This leads to not only an imbalance issue of the usage count of the device's parent, but also to keep it runtime resumed permanently even if ->probe() fails. To address these issues, let's change the policy of the driver core to meet these expectations. More precisely, at ->probe() failures and driver unbind, restore the initial states of runtime PM. Although to still allow subsystem's to control PM for devices that doesn't ->probe() successfully, don't restore the initial states unless runtime PM is disabled. Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Kevin Hilman <khilman@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-11-18 18:48:39 +08:00
/**
* pm_runtime_reinit - Re-initialize runtime PM fields in given device object.
* @dev: Device object to re-initialize.
*/
void pm_runtime_reinit(struct device *dev)
{
if (!pm_runtime_enabled(dev)) {
if (dev->power.runtime_status == RPM_ACTIVE)
pm_runtime_set_suspended(dev);
if (dev->power.irq_safe) {
spin_lock_irq(&dev->power.lock);
dev->power.irq_safe = 0;
spin_unlock_irq(&dev->power.lock);
if (dev->parent)
pm_runtime_put(dev->parent);
}
}
}
/**
* pm_runtime_remove - Prepare for removing a device from device hierarchy.
* @dev: Device object being removed from device hierarchy.
*/
void pm_runtime_remove(struct device *dev)
{
__pm_runtime_disable(dev, false);
PM / runtime: Re-init runtime PM states at probe error and driver unbind There are two common expectations among several subsystems/drivers that deploys runtime PM support, but which isn't met by the driver core. Expectation 1) At ->probe() the subsystem/driver expects the runtime PM status of the device to be RPM_SUSPENDED, which is the initial status being assigned at device registration. This expectation is especially common among some of those subsystems/ drivers that manages devices with an attached PM domain, as those requires the ->runtime_resume() callback at the PM domain level to be invoked during ->probe(). Moreover these subsystems/drivers entirely relies on runtime PM resources being managed at the PM domain level, thus don't implement their own set of runtime PM callbacks. These are two scenarios that suffers from this unmet expectation. i) A failed ->probe() sequence requests probe deferral: ->probe() ... pm_runtime_enable() pm_runtime_get_sync() ... err: pm_runtime_put() pm_runtime_disable() ... As there are no guarantees that such sequence turns the runtime PM status of the device into RPM_SUSPENDED, the re-trying ->probe() may start with the status in RPM_ACTIVE. In such case the runtime PM core won't invoke the ->runtime_resume() callback because of a pm_runtime_get_sync(), as it considers the device to be already runtime resumed. ii) A driver re-bind sequence: At driver unbind, the subsystem/driver's >remove() callback invokes a sequence of runtime PM APIs, to undo actions during ->probe() and to put the device into low power state. ->remove() ... pm_runtime_put() pm_runtime_disable() ... Similar as in the failing ->probe() case, this sequence don't guarantee the runtime PM status of the device to turn into RPM_SUSPENDED. Trying to re-bind the driver thus causes the same issue as when re-trying ->probe(), in the probe deferral scenario. Expectation 2) Drivers that invokes the pm_runtime_irq_safe() API during ->probe(), triggers the runtime PM core to increase the usage count for the device's parent and permanently make it runtime resumed. The usage count is only dropped at device removal, which also allows it to be runtime suspended again. A re-trying ->probe() repeats the call to pm_runtime_irq_safe() and thus once more triggers the usage count of the device's parent to be increased. This leads to not only an imbalance issue of the usage count of the device's parent, but also to keep it runtime resumed permanently even if ->probe() fails. To address these issues, let's change the policy of the driver core to meet these expectations. More precisely, at ->probe() failures and driver unbind, restore the initial states of runtime PM. Although to still allow subsystem's to control PM for devices that doesn't ->probe() successfully, don't restore the initial states unless runtime PM is disabled. Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Kevin Hilman <khilman@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-11-18 18:48:39 +08:00
pm_runtime_reinit(dev);
}
/**
* pm_runtime_clean_up_links - Prepare links to consumers for driver removal.
* @dev: Device whose driver is going to be removed.
*
* Check links from this device to any consumers and if any of them have active
* runtime PM references to the device, drop the usage counter of the device
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
* (as many times as needed).
*
driver core: Remove device link creation limitation If device_link_add() is called for a consumer/supplier pair with an existing device link between them and the existing link's type is not in agreement with the flags passed to that function by its caller, NULL will be returned. That is seriously inconvenient, because it forces the callers of device_link_add() to worry about what others may or may not do even if that is not relevant to them for any other reasons. It turns out, however, that this limitation can be made go away relatively easily. The underlying observation is that if DL_FLAG_STATELESS has been passed to device_link_add() in flags for the given consumer/supplier pair at least once, calling either device_link_del() or device_link_remove() to release the link returned by it should work, but there are no other requirements associated with that flag. In turn, if at least one of the callers of device_link_add() for the given consumer/supplier pair has not passed DL_FLAG_STATELESS to it in flags, the driver core should track the status of the link and act on it as appropriate (ie. the link should be treated as "managed"). This means that DL_FLAG_STATELESS needs to be set for managed device links and it should be valid to call device_link_del() or device_link_remove() to drop references to them in certain sutiations. To allow that to happen, introduce a new (internal) device link flag called DL_FLAG_MANAGED and make device_link_add() set it automatically whenever DL_FLAG_STATELESS is not passed to it. Also make it take additional references to existing device links that were previously stateless (that is, with DL_FLAG_STATELESS set and DL_FLAG_MANAGED unset) and will need to be managed going forward and initialize their status (which has been DL_STATE_NONE so far). Accordingly, when a managed device link is dropped automatically by the driver core, make it clear DL_FLAG_MANAGED, reset the link's status back to DL_STATE_NONE and drop the reference to it associated with DL_FLAG_MANAGED instead of just deleting it right away (to allow it to stay around in case it still needs to be released explicitly by someone). With that, since setting DL_FLAG_STATELESS doesn't mean that the device link in question is not managed any more, replace all of the status-tracking checks against DL_FLAG_STATELESS with analogous checks against DL_FLAG_MANAGED and update the documentation to reflect these changes. While at it, make device_link_add() reject flags that it does not recognize, including DL_FLAG_MANAGED. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Saravana Kannan <saravanak@google.com> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> Review-by: Saravana Kannan <saravanak@google.com> Link: https://lore.kernel.org/r/2305283.AStDPdUUnE@kreacher Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-07-16 23:21:06 +08:00
* Links with the DL_FLAG_MANAGED flag unset are ignored.
*
* Since the device is guaranteed to be runtime-active at the point this is
* called, nothing else needs to be done here.
*
* Moreover, this is called after device_links_busy() has returned 'false', so
* the status of each link is guaranteed to be DL_STATE_SUPPLIER_UNBIND and
* therefore rpm_active can't be manipulated concurrently.
*/
void pm_runtime_clean_up_links(struct device *dev)
{
struct device_link *link;
int idx;
idx = device_links_read_lock();
list_for_each_entry_rcu(link, &dev->links.consumers, s_node,
device_links_read_lock_held()) {
driver core: Remove device link creation limitation If device_link_add() is called for a consumer/supplier pair with an existing device link between them and the existing link's type is not in agreement with the flags passed to that function by its caller, NULL will be returned. That is seriously inconvenient, because it forces the callers of device_link_add() to worry about what others may or may not do even if that is not relevant to them for any other reasons. It turns out, however, that this limitation can be made go away relatively easily. The underlying observation is that if DL_FLAG_STATELESS has been passed to device_link_add() in flags for the given consumer/supplier pair at least once, calling either device_link_del() or device_link_remove() to release the link returned by it should work, but there are no other requirements associated with that flag. In turn, if at least one of the callers of device_link_add() for the given consumer/supplier pair has not passed DL_FLAG_STATELESS to it in flags, the driver core should track the status of the link and act on it as appropriate (ie. the link should be treated as "managed"). This means that DL_FLAG_STATELESS needs to be set for managed device links and it should be valid to call device_link_del() or device_link_remove() to drop references to them in certain sutiations. To allow that to happen, introduce a new (internal) device link flag called DL_FLAG_MANAGED and make device_link_add() set it automatically whenever DL_FLAG_STATELESS is not passed to it. Also make it take additional references to existing device links that were previously stateless (that is, with DL_FLAG_STATELESS set and DL_FLAG_MANAGED unset) and will need to be managed going forward and initialize their status (which has been DL_STATE_NONE so far). Accordingly, when a managed device link is dropped automatically by the driver core, make it clear DL_FLAG_MANAGED, reset the link's status back to DL_STATE_NONE and drop the reference to it associated with DL_FLAG_MANAGED instead of just deleting it right away (to allow it to stay around in case it still needs to be released explicitly by someone). With that, since setting DL_FLAG_STATELESS doesn't mean that the device link in question is not managed any more, replace all of the status-tracking checks against DL_FLAG_STATELESS with analogous checks against DL_FLAG_MANAGED and update the documentation to reflect these changes. While at it, make device_link_add() reject flags that it does not recognize, including DL_FLAG_MANAGED. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Saravana Kannan <saravanak@google.com> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> Review-by: Saravana Kannan <saravanak@google.com> Link: https://lore.kernel.org/r/2305283.AStDPdUUnE@kreacher Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-07-16 23:21:06 +08:00
if (!(link->flags & DL_FLAG_MANAGED))
continue;
driver core: Fix handling of runtime PM flags in device_link_add() After commit ead18c23c263 ("driver core: Introduce device links reference counting"), if there is a link between the given supplier and the given consumer already, device_link_add() will refcount it and return it unconditionally without updating its flags. It is possible, however, that the second (or any subsequent) caller of device_link_add() for the same consumer-supplier pair will pass DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags to it and the existing link may not behave as expected then. First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags at all, it needs to be set like during the original initialization of the link. Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags (in addition to DL_FLAG_PM_RUNTIME), the existing link should to be updated to reflect the "active" runtime PM configuration of the consumer-supplier pair and extra care must be taken here to avoid possible destructive races with runtime PM of the consumer. To that end, redefine the rpm_active field in struct device_link as a refcount, initialize it to 1 and make rpm_resume() (for the consumer) and device_link_add() increment it whenever they acquire a runtime PM reference on the supplier device. Accordingly, make rpm_suspend() (for the consumer) and pm_runtime_clean_up_links() decrement it and drop runtime PM references to the supplier device in a loop until rpm_active becones 1 again. Fixes: ead18c23c263 ("driver core: Introduce device links reference counting") Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-01 08:49:14 +08:00
while (refcount_dec_not_one(&link->rpm_active))
pm_runtime_put_noidle(dev);
}
device_links_read_unlock(idx);
}
/**
* pm_runtime_get_suppliers - Resume and reference-count supplier devices.
* @dev: Consumer device.
*/
void pm_runtime_get_suppliers(struct device *dev)
{
struct device_link *link;
int idx;
idx = device_links_read_lock();
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held())
driver core: Fix possible supplier PM-usage counter imbalance If a stateless device link to a certain supplier with DL_FLAG_PM_RUNTIME set in the flags is added and then removed by the consumer driver's probe callback, the supplier's PM-runtime usage counter will be nonzero after that which effectively causes the supplier to remain "always on" going forward. Namely, device_link_add() called to add the link invokes device_link_rpm_prepare() which notices that the consumer driver is probing, so it increments the supplier's PM-runtime usage counter with the assumption that the link will stay around until pm_runtime_put_suppliers() is called by driver_probe_device(), but if the link goes away before that point, the supplier's PM-runtime usage counter will remain nonzero. To prevent that from happening, first rework pm_runtime_get_suppliers() and pm_runtime_put_suppliers() to use the rpm_active refounts of device links and make the latter only drop rpm_active and the supplier's PM-runtime usage counter for each link by one, unless rpm_active is one already for it. Next, modify device_link_add() to bump up the new link's rpm_active refcount and the suppliers PM-runtime usage counter by two, to prevent pm_runtime_put_suppliers(), if it is called subsequently, from suspending the supplier prematurely (in case its PM-runtime usage counter goes down to 0 in there). Due to the way rpm_put_suppliers() works, this change does not affect runtime suspend of the consumer ends of new device links (or, generally, device links for which DL_FLAG_PM_RUNTIME has just been set). Fixes: e2f3cd831a28 ("driver core: Fix handling of runtime PM flags in device_link_add()") Reported-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Tested-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-12 20:08:10 +08:00
if (link->flags & DL_FLAG_PM_RUNTIME) {
driver core: Fix PM-runtime for links added during consumer probe Commit 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage counter imbalance") introduced a regression that causes suppliers to be suspended prematurely for device links added during consumer driver probe if the initial PM-runtime status of the consumer is "suspended" and the consumer is resumed after adding the link and before pm_runtime_put_suppliers() is called. In that case, pm_runtime_put_suppliers() will drop the rpm_active refcount for the link by one and (since rpm_active is equal to two after the preceding consumer resume) the supplier's PM-runtime usage counter will be decremented, which may cause the supplier to suspend even though the consumer's PM-runtime status is "active". For this reason, partially revert commit 4c06c4e6cf63 as the problem it tried to fix needs to be addressed somewhat differently, and change pm_runtime_get_suppliers() and pm_runtime_put_suppliers() so that the latter only drops rpm_active references acquired by the former. [This requires adding a new field to struct device_link, but I coulnd't find a cleaner way to address the issue that would work in all cases.] This causes pm_runtime_put_suppliers() to effectively ignore device links added during consumer probe, so device_link_add() doesn't need to worry about ensuring that suppliers will remain active after pm_runtime_put_suppliers() for links created with DL_FLAG_RPM_ACTIVE set and it only needs to bump up rpm_active by one for those links, so pm_runtime_active_link() is not necessary any more. Fixes: 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage counter imbalance") Reported-by: Jon Hunter <jonathanh@nvidia.com> Tested-by: Jon Hunter <jonathanh@nvidia.com> Tested-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-20 00:53:26 +08:00
link->supplier_preactivated = true;
driver core: Fix possible supplier PM-usage counter imbalance If a stateless device link to a certain supplier with DL_FLAG_PM_RUNTIME set in the flags is added and then removed by the consumer driver's probe callback, the supplier's PM-runtime usage counter will be nonzero after that which effectively causes the supplier to remain "always on" going forward. Namely, device_link_add() called to add the link invokes device_link_rpm_prepare() which notices that the consumer driver is probing, so it increments the supplier's PM-runtime usage counter with the assumption that the link will stay around until pm_runtime_put_suppliers() is called by driver_probe_device(), but if the link goes away before that point, the supplier's PM-runtime usage counter will remain nonzero. To prevent that from happening, first rework pm_runtime_get_suppliers() and pm_runtime_put_suppliers() to use the rpm_active refounts of device links and make the latter only drop rpm_active and the supplier's PM-runtime usage counter for each link by one, unless rpm_active is one already for it. Next, modify device_link_add() to bump up the new link's rpm_active refcount and the suppliers PM-runtime usage counter by two, to prevent pm_runtime_put_suppliers(), if it is called subsequently, from suspending the supplier prematurely (in case its PM-runtime usage counter goes down to 0 in there). Due to the way rpm_put_suppliers() works, this change does not affect runtime suspend of the consumer ends of new device links (or, generally, device links for which DL_FLAG_PM_RUNTIME has just been set). Fixes: e2f3cd831a28 ("driver core: Fix handling of runtime PM flags in device_link_add()") Reported-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Tested-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-12 20:08:10 +08:00
refcount_inc(&link->rpm_active);
pm_runtime_get_sync(link->supplier);
driver core: Fix possible supplier PM-usage counter imbalance If a stateless device link to a certain supplier with DL_FLAG_PM_RUNTIME set in the flags is added and then removed by the consumer driver's probe callback, the supplier's PM-runtime usage counter will be nonzero after that which effectively causes the supplier to remain "always on" going forward. Namely, device_link_add() called to add the link invokes device_link_rpm_prepare() which notices that the consumer driver is probing, so it increments the supplier's PM-runtime usage counter with the assumption that the link will stay around until pm_runtime_put_suppliers() is called by driver_probe_device(), but if the link goes away before that point, the supplier's PM-runtime usage counter will remain nonzero. To prevent that from happening, first rework pm_runtime_get_suppliers() and pm_runtime_put_suppliers() to use the rpm_active refounts of device links and make the latter only drop rpm_active and the supplier's PM-runtime usage counter for each link by one, unless rpm_active is one already for it. Next, modify device_link_add() to bump up the new link's rpm_active refcount and the suppliers PM-runtime usage counter by two, to prevent pm_runtime_put_suppliers(), if it is called subsequently, from suspending the supplier prematurely (in case its PM-runtime usage counter goes down to 0 in there). Due to the way rpm_put_suppliers() works, this change does not affect runtime suspend of the consumer ends of new device links (or, generally, device links for which DL_FLAG_PM_RUNTIME has just been set). Fixes: e2f3cd831a28 ("driver core: Fix handling of runtime PM flags in device_link_add()") Reported-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Tested-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-12 20:08:10 +08:00
}
device_links_read_unlock(idx);
}
/**
* pm_runtime_put_suppliers - Drop references to supplier devices.
* @dev: Consumer device.
*/
void pm_runtime_put_suppliers(struct device *dev)
{
struct device_link *link;
int idx;
idx = device_links_read_lock();
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held())
driver core: Fix PM-runtime for links added during consumer probe Commit 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage counter imbalance") introduced a regression that causes suppliers to be suspended prematurely for device links added during consumer driver probe if the initial PM-runtime status of the consumer is "suspended" and the consumer is resumed after adding the link and before pm_runtime_put_suppliers() is called. In that case, pm_runtime_put_suppliers() will drop the rpm_active refcount for the link by one and (since rpm_active is equal to two after the preceding consumer resume) the supplier's PM-runtime usage counter will be decremented, which may cause the supplier to suspend even though the consumer's PM-runtime status is "active". For this reason, partially revert commit 4c06c4e6cf63 as the problem it tried to fix needs to be addressed somewhat differently, and change pm_runtime_get_suppliers() and pm_runtime_put_suppliers() so that the latter only drops rpm_active references acquired by the former. [This requires adding a new field to struct device_link, but I coulnd't find a cleaner way to address the issue that would work in all cases.] This causes pm_runtime_put_suppliers() to effectively ignore device links added during consumer probe, so device_link_add() doesn't need to worry about ensuring that suppliers will remain active after pm_runtime_put_suppliers() for links created with DL_FLAG_RPM_ACTIVE set and it only needs to bump up rpm_active by one for those links, so pm_runtime_active_link() is not necessary any more. Fixes: 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage counter imbalance") Reported-by: Jon Hunter <jonathanh@nvidia.com> Tested-by: Jon Hunter <jonathanh@nvidia.com> Tested-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-02-20 00:53:26 +08:00
if (link->supplier_preactivated) {
link->supplier_preactivated = false;
if (refcount_dec_not_one(&link->rpm_active))
pm_runtime_put(link->supplier);
}
device_links_read_unlock(idx);
}
void pm_runtime_new_link(struct device *dev)
{
spin_lock_irq(&dev->power.lock);
dev->power.links_count++;
spin_unlock_irq(&dev->power.lock);
}
void pm_runtime_drop_link(struct device *dev)
{
spin_lock_irq(&dev->power.lock);
WARN_ON(dev->power.links_count == 0);
dev->power.links_count--;
spin_unlock_irq(&dev->power.lock);
}
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
static bool pm_runtime_need_not_resume(struct device *dev)
{
return atomic_read(&dev->power.usage_count) <= 1 &&
(atomic_read(&dev->power.child_count) == 0 ||
dev->power.ignore_children);
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
}
/**
* pm_runtime_force_suspend - Force a device into suspend state if needed.
* @dev: Device to suspend.
*
* Disable runtime PM so we safely can check the device's runtime PM status and
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
* if it is active, invoke its ->runtime_suspend callback to suspend it and
* change its runtime PM status field to RPM_SUSPENDED. Also, if the device's
* usage and children counters don't indicate that the device was in use before
* the system-wide transition under way, decrement its parent's children counter
* (if there is a parent). Keep runtime PM disabled to preserve the state
* unless we encounter errors.
*
* Typically this function may be invoked from a system suspend callback to make
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
* sure the device is put into low power state and it should only be used during
* system-wide PM transitions to sleep states. It assumes that the analogous
* pm_runtime_force_resume() will be used to resume the device.
*/
int pm_runtime_force_suspend(struct device *dev)
{
int (*callback)(struct device *);
int ret;
pm_runtime_disable(dev);
if (pm_runtime_status_suspended(dev))
return 0;
callback = RPM_GET_CALLBACK(dev, runtime_suspend);
ret = callback ? callback(dev) : 0;
if (ret)
goto err;
/*
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
* If the device can stay in suspend after the system-wide transition
* to the working state that will follow, drop the children counter of
* its parent, but set its status to RPM_SUSPENDED anyway in case this
* function will be called again for it in the meantime.
*/
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
if (pm_runtime_need_not_resume(dev))
pm_runtime_set_suspended(dev);
else
__update_runtime_status(dev, RPM_SUSPENDED);
return 0;
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
err:
pm_runtime_enable(dev);
return ret;
}
EXPORT_SYMBOL_GPL(pm_runtime_force_suspend);
/**
* pm_runtime_force_resume - Force a device into resume state if needed.
* @dev: Device to resume.
*
* Prior invoking this function we expect the user to have brought the device
* into low power state by a call to pm_runtime_force_suspend(). Here we reverse
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
* those actions and bring the device into full power, if it is expected to be
* used on system resume. In the other case, we defer the resume to be managed
* via runtime PM.
*
* Typically this function may be invoked from a system resume callback.
*/
int pm_runtime_force_resume(struct device *dev)
{
int (*callback)(struct device *);
int ret = 0;
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
goto out;
/*
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
* The value of the parent's children counter is correct already, so
* just update the status of the device.
*/
__update_runtime_status(dev, RPM_ACTIVE);
PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
2018-01-12 21:12:05 +08:00
callback = RPM_GET_CALLBACK(dev, runtime_resume);
ret = callback ? callback(dev) : 0;
if (ret) {
pm_runtime_set_suspended(dev);
goto out;
}
pm_runtime_mark_last_busy(dev);
out:
pm_runtime_enable(dev);
return ret;
}
EXPORT_SYMBOL_GPL(pm_runtime_force_resume);