diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 45d00e5fe14d..4d806b419606 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -36,7 +36,7 @@ static struct cpuidle_state *cpuidle_state_table; static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) { - *kt_before = ktime_get_real(); + *kt_before = ktime_get(); *in_purr = mfspr(SPRN_PURR); /* * Indicate to the HV that we are idle. Now would be @@ -50,7 +50,7 @@ static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before) get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr; get_lppaca()->idle = 0; - return ktime_to_us(ktime_sub(ktime_get_real(), kt_before)); + return ktime_to_us(ktime_sub(ktime_get(), kt_before)); } static int snooze_loop(struct cpuidle_device *dev, diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e8086c725305..f1a5da44591d 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -735,31 +735,18 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - ktime_t kt1, kt2; - s64 idle_time; struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; - local_irq_disable(); - - lapic_timer_state_broadcast(pr, cx, 1); - kt1 = ktime_get_real(); acpi_idle_do_entry(cx); - kt2 = ktime_get_real(); - idle_time = ktime_to_us(ktime_sub(kt2, kt1)); - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; - - local_irq_enable(); lapic_timer_state_broadcast(pr, cx, 0); return index; @@ -806,19 +793,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); - ktime_t kt1, kt2; - s64 idle_time_ns; - s64 idle_time; pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; - local_irq_disable(); - - if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -829,7 +809,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; - local_irq_enable(); return -EINVAL; } } @@ -843,22 +822,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); - kt1 = ktime_get_real(); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); acpi_idle_do_entry(cx); - kt2 = ktime_get_real(); - idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1)); - idle_time = idle_time_ns; - do_div(idle_time, NSEC_PER_USEC); - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; + sched_clock_idle_wakeup_event(0); - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(idle_time_ns); - - local_irq_enable(); if (cx->entry_method != ACPI_CSTATE_FFH) current_thread_info()->status |= TS_POLLING; @@ -883,13 +852,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); - ktime_t kt1, kt2; - s64 idle_time_ns; - s64 idle_time; - pr = __this_cpu_read(processors); - dev->last_residency = 0; if (unlikely(!pr)) return -EINVAL; @@ -899,16 +863,11 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, return drv->states[drv->safe_state_index].enter(dev, drv, drv->safe_state_index); } else { - local_irq_disable(); acpi_safe_halt(); - local_irq_enable(); return -EBUSY; } } - local_irq_disable(); - - if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -919,7 +878,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; - local_irq_enable(); return -EINVAL; } } @@ -934,7 +892,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, */ lapic_timer_state_broadcast(pr, cx, 1); - kt1 = ktime_get_real(); /* * disable bus master * bm_check implies we need ARB_DIS @@ -965,18 +922,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, c3_cpu_count--; raw_spin_unlock(&c3_lock); } - kt2 = ktime_get_real(); - idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1)); - idle_time = idle_time_ns; - do_div(idle_time, NSEC_PER_USEC); - /* Update device last_residency*/ - dev->last_residency = (int)idle_time; + sched_clock_idle_wakeup_event(0); - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(idle_time_ns); - - local_irq_enable(); if (cx->entry_method != ACPI_CSTATE_FFH) current_thread_info()->status |= TS_POLLING; @@ -987,6 +935,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_driver acpi_idle_driver = { .name = "acpi_idle", .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; /** diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index a76b689e553b..234ae651b38f 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -9,6 +9,15 @@ config CPU_IDLE If you're using an ACPI-enabled platform, you should say Y here. +config CPU_IDLE_MULTIPLE_DRIVERS + bool "Support multiple cpuidle drivers" + depends on CPU_IDLE + default n + help + Allows the cpuidle framework to use different drivers for each CPU. + This is useful if you have a system with different CPU latencies and + states. If unsure say N. + config CPU_IDLE_GOV_LADDER bool depends on CPU_IDLE diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f15b8514a18..8df53dd8dbe1 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -68,7 +68,7 @@ static cpuidle_enter_t cpuidle_enter_ops; int cpuidle_play_dead(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int i, dead_state = -1; int power_usage = -1; @@ -109,8 +109,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, /* This can be moved to within driver enter routine * but that results in multiple copies of same code. */ - dev->states_usage[entered_state].time += - (unsigned long long)dev->last_residency; + dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].usage++; } else { dev->last_residency = 0; @@ -128,7 +127,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; int next_state, entered_state; if (off) @@ -141,9 +140,15 @@ int cpuidle_idle_call(void) if (!dev || !dev->enabled) return -EBUSY; + drv = cpuidle_get_cpu_driver(dev); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(drv, dev); if (need_resched()) { + dev->last_residency = 0; + /* give the governor an opportunity to reflect on the outcome */ + if (cpuidle_curr_governor->reflect) + cpuidle_curr_governor->reflect(dev, next_state); local_irq_enable(); return 0; } @@ -308,15 +313,19 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} int cpuidle_enable_device(struct cpuidle_device *dev) { int ret, i; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; if (!dev) return -EINVAL; if (dev->enabled) return 0; + + drv = cpuidle_get_cpu_driver(dev); + if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->state_count) dev->state_count = drv->state_count; @@ -331,7 +340,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) poll_idle_init(drv); - if ((ret = cpuidle_add_state_sysfs(dev))) + ret = cpuidle_add_device_sysfs(dev); + if (ret) return ret; if (cpuidle_curr_governor->enable && @@ -352,7 +362,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return 0; fail_sysfs: - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); return ret; } @@ -368,17 +378,20 @@ EXPORT_SYMBOL_GPL(cpuidle_enable_device); */ void cpuidle_disable_device(struct cpuidle_device *dev) { + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + if (!dev || !dev->enabled) return; - if (!cpuidle_get_driver() || !cpuidle_curr_governor) + + if (!drv || !cpuidle_curr_governor) return; dev->enabled = 0; if (cpuidle_curr_governor->disable) - cpuidle_curr_governor->disable(cpuidle_get_driver(), dev); + cpuidle_curr_governor->disable(drv, dev); - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); enabled_devices--; } @@ -394,17 +407,14 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device); static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; - struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - if (!try_module_get(cpuidle_driver->owner)) + if (!try_module_get(drv->owner)) return -EINVAL; - init_completion(&dev->kobj_unregister); - per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - ret = cpuidle_add_sysfs(cpu_dev); + ret = cpuidle_add_sysfs(dev); if (ret) goto err_sysfs; @@ -416,12 +426,11 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) return 0; err_coupled: - cpuidle_remove_sysfs(cpu_dev); - wait_for_completion(&dev->kobj_unregister); + cpuidle_remove_sysfs(dev); err_sysfs: list_del(&dev->device_list); per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(cpuidle_driver->owner); + module_put(drv->owner); return ret; } @@ -460,8 +469,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); if (dev->registered == 0) return; @@ -470,16 +478,15 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); - cpuidle_remove_sysfs(cpu_dev); + cpuidle_remove_sysfs(dev); list_del(&dev->device_list); - wait_for_completion(&dev->kobj_unregister); per_cpu(cpuidle_devices, dev->cpu) = NULL; cpuidle_coupled_unregister_device(dev); cpuidle_resume_and_unlock(); - module_put(cpuidle_driver->owner); + module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 76e7f696ad8c..ee97e9672ecf 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -5,8 +5,6 @@ #ifndef __DRIVER_CPUIDLE_H #define __DRIVER_CPUIDLE_H -#include - /* For internal use only */ extern struct cpuidle_governor *cpuidle_curr_governor; extern struct list_head cpuidle_governors; @@ -25,12 +23,15 @@ extern void cpuidle_uninstall_idle_handler(void); extern int cpuidle_switch_governor(struct cpuidle_governor *gov); /* sysfs */ + +struct device; + extern int cpuidle_add_interface(struct device *dev); extern void cpuidle_remove_interface(struct device *dev); -extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); -extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); -extern int cpuidle_add_sysfs(struct device *dev); -extern void cpuidle_remove_sysfs(struct device *dev); +extern int cpuidle_add_device_sysfs(struct cpuidle_device *device); +extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device); +extern int cpuidle_add_sysfs(struct cpuidle_device *dev); +extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED bool cpuidle_state_is_coupled(struct cpuidle_device *dev, diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 87db3877fead..3af841fb397a 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -14,9 +14,10 @@ #include "cpuidle.h" -static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); -int cpuidle_driver_refcount; + +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); +static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); static void set_power_states(struct cpuidle_driver *drv) { @@ -40,11 +41,15 @@ static void set_power_states(struct cpuidle_driver *drv) drv->states[i].power_usage = -1 - i; } -/** - * cpuidle_register_driver - registers a driver - * @drv: the driver - */ -int cpuidle_register_driver(struct cpuidle_driver *drv) +static void __cpuidle_driver_init(struct cpuidle_driver *drv) +{ + drv->refcnt = 0; + + if (!drv->power_specified) + set_power_states(drv); +} + +static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) { if (!drv || !drv->state_count) return -EINVAL; @@ -52,31 +57,101 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) if (cpuidle_disabled()) return -ENODEV; - spin_lock(&cpuidle_driver_lock); - if (cpuidle_curr_driver) { - spin_unlock(&cpuidle_driver_lock); + if (__cpuidle_get_cpu_driver(cpu)) return -EBUSY; - } - if (!drv->power_specified) - set_power_states(drv); + __cpuidle_driver_init(drv); - cpuidle_curr_driver = drv; - - spin_unlock(&cpuidle_driver_lock); + __cpuidle_set_cpu_driver(drv, cpu); return 0; } -EXPORT_SYMBOL_GPL(cpuidle_register_driver); + +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu) +{ + if (drv != __cpuidle_get_cpu_driver(cpu)) + return; + + if (!WARN_ON(drv->refcnt > 0)) + __cpuidle_set_cpu_driver(NULL, cpu); +} + +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS + +static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers); + +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + per_cpu(cpuidle_drivers, cpu) = drv; +} + +static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +{ + return per_cpu(cpuidle_drivers, cpu); +} + +static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv) +{ + int cpu; + for_each_present_cpu(cpu) + __cpuidle_unregister_driver(drv, cpu); +} + +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv) +{ + int ret = 0; + int i, cpu; + + for_each_present_cpu(cpu) { + ret = __cpuidle_register_driver(drv, cpu); + if (ret) + break; + } + + if (ret) + for_each_present_cpu(i) { + if (i == cpu) + break; + __cpuidle_unregister_driver(drv, i); + } + + + return ret; +} + +int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + int ret; + + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); +} /** - * cpuidle_get_driver - return the current driver + * cpuidle_register_driver - registers a driver + * @drv: the driver */ -struct cpuidle_driver *cpuidle_get_driver(void) +int cpuidle_register_driver(struct cpuidle_driver *drv) { - return cpuidle_curr_driver; + int ret; + + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_all_cpu_driver(drv); + spin_unlock(&cpuidle_driver_lock); + + return ret; } -EXPORT_SYMBOL_GPL(cpuidle_get_driver); +EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** * cpuidle_unregister_driver - unregisters a driver @@ -84,29 +159,103 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { - if (drv != cpuidle_curr_driver) { - WARN(1, "invalid cpuidle_unregister_driver(%s)\n", - drv->name); - return; - } - spin_lock(&cpuidle_driver_lock); - - if (!WARN_ON(cpuidle_driver_refcount > 0)) - cpuidle_curr_driver = NULL; - + __cpuidle_unregister_all_cpu_driver(drv); spin_unlock(&cpuidle_driver_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); +#else + +static struct cpuidle_driver *cpuidle_curr_driver; + +static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + cpuidle_curr_driver = drv; +} + +static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +{ + return cpuidle_curr_driver; +} + +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + int ret, cpu; + + cpu = get_cpu(); + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + put_cpu(); + + return ret; +} +EXPORT_SYMBOL_GPL(cpuidle_register_driver); + +/** + * cpuidle_unregister_driver - unregisters a driver + * @drv: the driver + */ +void cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + int cpu; + + cpu = get_cpu(); + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + put_cpu(); +} +EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); +#endif + +/** + * cpuidle_get_driver - return the current driver + */ +struct cpuidle_driver *cpuidle_get_driver(void) +{ + struct cpuidle_driver *drv; + int cpu; + + cpu = get_cpu(); + drv = __cpuidle_get_cpu_driver(cpu); + put_cpu(); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_get_cpu_driver - return the driver tied with a cpu + */ +struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv; + + if (!dev) + return NULL; + + spin_lock(&cpuidle_driver_lock); + drv = __cpuidle_get_cpu_driver(dev->cpu); + spin_unlock(&cpuidle_driver_lock); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver); + struct cpuidle_driver *cpuidle_driver_ref(void) { struct cpuidle_driver *drv; spin_lock(&cpuidle_driver_lock); - drv = cpuidle_curr_driver; - cpuidle_driver_refcount++; + drv = cpuidle_get_driver(); + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; @@ -114,10 +263,12 @@ struct cpuidle_driver *cpuidle_driver_ref(void) void cpuidle_driver_unref(void) { + struct cpuidle_driver *drv = cpuidle_get_driver(); + spin_lock(&cpuidle_driver_lock); - if (!WARN_ON(cpuidle_driver_refcount <= 0)) - cpuidle_driver_refcount--; + if (drv && !WARN_ON(drv->refcnt <= 0)) + drv->refcnt--; spin_unlock(&cpuidle_driver_lock); } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5b1f2c372c1f..bd40b943b6db 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,6 +28,13 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 +/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ +#define MAX_DEVIATION 60 + +static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); +static DEFINE_PER_CPU(int, hrtimer_status); +/* menu hrtimer mode */ +enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; /* * Concepts and ideas behind the menu governor @@ -109,6 +116,13 @@ * */ +/* + * The C-state residency is so long that is is worthwhile to exit + * from the shallow C-state and re-enter into a deeper C-state. + */ +static unsigned int perfect_cstate_ms __read_mostly = 30; +module_param(perfect_cstate_ms, uint, 0000); + struct menu_device { int last_state_idx; int needs_update; @@ -191,40 +205,102 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } +/* Cancel the hrtimer if it is not triggered yet */ +void menu_hrtimer_cancel(void) +{ + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); + + /* The timer is still not time out*/ + if (per_cpu(hrtimer_status, cpu)) { + hrtimer_cancel(hrtmr); + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + } +} +EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); + +/* Call back for hrtimer is triggered */ +static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) +{ + int cpu = smp_processor_id(); + struct menu_device *data = &per_cpu(menu_devices, cpu); + + /* In general case, the expected residency is much larger than + * deepest C-state target residency, but prediction logic still + * predicts a small predicted residency, so the prediction + * history is totally broken if the timer is triggered. + * So reset the correction factor. + */ + if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) + data->correction_factor[data->bucket] = RESOLUTION * DECAY; + + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + + return HRTIMER_NORESTART; +} + /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static void detect_repeating_patterns(struct menu_device *data) +static u32 get_typical_interval(struct menu_device *data) { - int i; - uint64_t avg = 0; - uint64_t stddev = 0; /* contains the square of the std deviation */ + int i = 0, divisor = 0; + uint64_t max = 0, avg = 0, stddev = 0; + int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ + unsigned int ret = 0; + +again: /* first calculate average and standard deviation of the past */ - for (i = 0; i < INTERVALS; i++) - avg += data->intervals[i]; - avg = avg / INTERVALS; - - /* if the avg is beyond the known next tick, it's worthless */ - if (avg > data->expected_us) - return; - - for (i = 0; i < INTERVALS; i++) - stddev += (data->intervals[i] - avg) * - (data->intervals[i] - avg); - - stddev = stddev / INTERVALS; + max = avg = divisor = stddev = 0; + for (i = 0; i < INTERVALS; i++) { + int64_t value = data->intervals[i]; + if (value <= thresh) { + avg += value; + divisor++; + if (value > max) + max = value; + } + } + do_div(avg, divisor); + for (i = 0; i < INTERVALS; i++) { + int64_t value = data->intervals[i]; + if (value <= thresh) { + int64_t diff = value - avg; + stddev += diff * diff; + } + } + do_div(stddev, divisor); + stddev = int_sqrt(stddev); /* - * now.. if stddev is small.. then assume we have a - * repeating pattern and predict we keep doing this. + * If we have outliers to the upside in our distribution, discard + * those by setting the threshold to exclude these outliers, then + * calculate the average and standard deviation again. Once we get + * down to the bottom 3/4 of our samples, stop excluding samples. + * + * This can deal with workloads that have long pauses interspersed + * with sporadic activity with a bunch of short pauses. + * + * The typical interval is obtained when standard deviation is small + * or standard deviation is small compared to the average interval. */ - - if (avg && stddev < STDDEV_THRESH) + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { data->predicted_us = avg; + ret = 1; + return ret; + + } else if ((divisor * 4) > INTERVALS * 3) { + /* Exclude the max interval */ + thresh = max - 1; + goto again; + } + + return ret; } /** @@ -240,6 +316,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; + int repeat = 0, low_predicted = 0; + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -274,7 +353,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - detect_repeating_patterns(data); + repeat = get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling @@ -295,8 +374,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) + if (s->target_residency > data->predicted_us) { + low_predicted = 1; continue; + } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -309,6 +390,44 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } + /* not deepest C-state chosen for low predicted residency */ + if (low_predicted) { + unsigned int timer_us = 0; + unsigned int perfect_us = 0; + + /* + * Set a timer to detect whether this sleep is much + * longer than repeat mode predicted. If the timer + * triggers, the code will evaluate whether to put + * the CPU into a deeper C-state. + * The timer is cancelled on CPU wakeup. + */ + timer_us = 2 * (data->predicted_us + MAX_DEVIATION); + + perfect_us = perfect_cstate_ms * 1000; + + if (repeat && (4 * timer_us < data->expected_us)) { + RCU_NONIDLE(hrtimer_start(hrtmr, + ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED)); + /* In repeat case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; + } else if (perfect_us < data->expected_us) { + /* + * The next timer is long. This could be because + * we did not make a useful prediction. + * In that case, it makes sense to re-enter + * into a deeper C-state after some time. + */ + RCU_NONIDLE(hrtimer_start(hrtmr, + ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED)); + /* In general case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; + } + + } + return data->last_state_idx; } @@ -399,6 +518,9 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); + struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 5f809e337b89..340942946106 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -297,6 +298,13 @@ static struct attribute *cpuidle_state_default_attrs[] = { NULL }; +struct cpuidle_state_kobj { + struct cpuidle_state *state; + struct cpuidle_state_usage *state_usage; + struct completion kobj_unregister; + struct kobject kobj; +}; + #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) @@ -356,17 +364,17 @@ static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) } /** - * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes + * cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes * @device: the target device */ -int cpuidle_add_state_sysfs(struct cpuidle_device *device) +static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ - for (i = 0; i < device->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); if (!kobj) goto error_state; @@ -374,8 +382,8 @@ int cpuidle_add_state_sysfs(struct cpuidle_device *device) kobj->state_usage = &device->states_usage[i]; init_completion(&kobj->kobj_unregister); - ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, &device->kobj, - "state%d", i); + ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, + &device->kobj, "state%d", i); if (ret) { kfree(kobj); goto error_state; @@ -393,10 +401,10 @@ error_state: } /** - * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes + * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes * @device: the target device */ -void cpuidle_remove_state_sysfs(struct cpuidle_device *device) +static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) { int i; @@ -404,17 +412,179 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device) cpuidle_free_state_kobj(device, i); } +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS +#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj) +#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr) + +#define define_one_driver_ro(_name, show) \ + static struct cpuidle_driver_attr attr_driver_##_name = \ + __ATTR(_name, 0644, show, NULL) + +struct cpuidle_driver_kobj { + struct cpuidle_driver *drv; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_driver_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_driver *, char *); + ssize_t (*store)(struct cpuidle_driver *, const char *, size_t); +}; + +static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf) +{ + ssize_t ret; + + spin_lock(&cpuidle_driver_lock); + ret = sprintf(buf, "%s\n", drv ? drv->name : "none"); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +static void cpuidle_driver_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + complete(&driver_kobj->kobj_unregister); +} + +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, + char * buf) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->show) + ret = dattr->show(driver_kobj->drv, buf); + + return ret; +} + +static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->store) + ret = dattr->store(driver_kobj->drv, buf, size); + + return ret; +} + +define_one_driver_ro(name, show_driver_name); + +static const struct sysfs_ops cpuidle_driver_sysfs_ops = { + .show = cpuidle_driver_show, + .store = cpuidle_driver_store, +}; + +static struct attribute *cpuidle_driver_default_attrs[] = { + &attr_driver_name.attr, + NULL +}; + +static struct kobj_type ktype_driver_cpuidle = { + .sysfs_ops = &cpuidle_driver_sysfs_ops, + .default_attrs = cpuidle_driver_default_attrs, + .release = cpuidle_driver_sysfs_release, +}; + +/** + * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute + * @device: the target device + */ +static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv; + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int ret; + + kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL); + if (!kdrv) + return -ENOMEM; + + kdrv->drv = drv; + init_completion(&kdrv->kobj_unregister); + + ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, + &dev->kobj, "driver"); + if (ret) { + kfree(kdrv); + return ret; + } + + kobject_uevent(&kdrv->kobj, KOBJ_ADD); + dev->kobj_driver = kdrv; + + return ret; +} + +/** + * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute + * @device: the target device + */ +static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv = dev->kobj_driver; + kobject_put(&kdrv->kobj); + wait_for_completion(&kdrv->kobj_unregister); + kfree(kdrv); +} +#else +static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + return 0; +} + +static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + ; +} +#endif + +/** + * cpuidle_add_device_sysfs - adds device specific sysfs attributes + * @device: the target device + */ +int cpuidle_add_device_sysfs(struct cpuidle_device *device) +{ + int ret; + + ret = cpuidle_add_state_sysfs(device); + if (ret) + return ret; + + ret = cpuidle_add_driver_sysfs(device); + if (ret) + cpuidle_remove_state_sysfs(device); + return ret; +} + +/** + * cpuidle_remove_device_sysfs : removes device specific sysfs attributes + * @device : the target device + */ +void cpuidle_remove_device_sysfs(struct cpuidle_device *device) +{ + cpuidle_remove_driver_sysfs(device); + cpuidle_remove_state_sysfs(device); +} + /** * cpuidle_add_sysfs - creates a sysfs instance for the target device * @dev: the target device */ -int cpuidle_add_sysfs(struct device *cpu_dev) +int cpuidle_add_sysfs(struct cpuidle_device *dev) { - int cpu = cpu_dev->id; - struct cpuidle_device *dev; + struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; - dev = per_cpu(cpuidle_devices, cpu); + init_completion(&dev->kobj_unregister); + error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, "cpuidle"); if (!error) @@ -426,11 +596,8 @@ int cpuidle_add_sysfs(struct device *cpu_dev) * cpuidle_remove_sysfs - deletes a sysfs instance on the target device * @dev: the target device */ -void cpuidle_remove_sysfs(struct device *cpu_dev) +void cpuidle_remove_sysfs(struct cpuidle_device *dev) { - int cpu = cpu_dev->id; - struct cpuidle_device *dev; - - dev = per_cpu(cpuidle_devices, cpu); kobject_put(&dev->kobj); + wait_for_completion(&dev->kobj_unregister); } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b0f6b4c8ee14..c49c04d9c2b0 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,7 +56,6 @@ #include #include #include -#include /* ktime_get_real() */ #include #include #include @@ -72,6 +71,7 @@ static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", .owner = THIS_MODULE, + .en_core_tk_irqen = 1, }; /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; @@ -281,8 +281,6 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); unsigned int cstate; - ktime_t kt_before, kt_after; - s64 usec_delta; int cpu = smp_processor_id(); cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; @@ -297,8 +295,6 @@ static int intel_idle(struct cpuidle_device *dev, if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); - kt_before = ktime_get_real(); - stop_critical_timings(); if (!need_resched()) { @@ -310,17 +306,9 @@ static int intel_idle(struct cpuidle_device *dev, start_critical_timings(); - kt_after = ktime_get_real(); - usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before)); - - local_irq_enable(); - if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - /* Update cpuidle counters */ - dev->last_residency = (int)usec_delta; - return index; } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 279b1eaa8b73..3711b34dc4f9 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -82,13 +82,6 @@ cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data) st_usage->driver_data = data; } -struct cpuidle_state_kobj { - struct cpuidle_state *state; - struct cpuidle_state_usage *state_usage; - struct completion kobj_unregister; - struct kobject kobj; -}; - struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; @@ -98,7 +91,7 @@ struct cpuidle_device { int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; - + struct cpuidle_driver_kobj *kobj_driver; struct list_head device_list; struct kobject kobj; struct completion kobj_unregister; @@ -131,6 +124,7 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) struct cpuidle_driver { const char *name; struct module *owner; + int refcnt; unsigned int power_specified:1; /* set to 1 to use the core cpuidle time keeping (for all states). */ @@ -163,6 +157,10 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)); extern int cpuidle_play_dead(void); +extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu); +extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu); + #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -189,7 +187,6 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)) { return -ENODEV; } static inline int cpuidle_play_dead(void) {return -ENODEV; } - #endif #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/include/linux/tick.h b/include/linux/tick.h index f37fceb69b73..1a6567b48492 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -142,4 +142,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +# ifdef CONFIG_CPU_IDLE_GOV_MENU +extern void menu_hrtimer_cancel(void); +# else +static inline void menu_hrtimer_cancel(void) {} +# endif /* CONFIG_CPU_IDLE_GOV_MENU */ + #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a40260885265..6f337068dc4c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -526,6 +526,8 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); __tick_nohz_idle_enter(ts); } @@ -621,6 +623,8 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get();