diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 24ff26a38ade..5fff00c70de0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states + * with P/T states and does not stop in deep C-states. + * + * It is also reliable across cores and sockets. (but not across + * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); + sched_clock_stable = 1; } } diff --git a/include/linux/sched.h b/include/linux/sched.h index 426666dd8203..7702cb166e08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1672,6 +1672,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) return set_cpus_allowed_ptr(p, &new_mask); } +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +extern int sched_clock_stable; +#endif + extern unsigned long long sched_clock(void); extern void sched_clock_init(void); diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index db69174b1178..7ec82c1c61c5 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -24,12 +24,12 @@ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat * consistent between cpus (never more than 2 jiffies difference). */ -#include -#include #include -#include -#include #include +#include +#include +#include +#include /* * Scheduler clock - returns current time in nanosec units. @@ -44,6 +44,10 @@ unsigned long long __attribute__((weak)) sched_clock(void) static __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +__read_mostly int sched_clock_stable; +#else +static const int sched_clock_stable = 1; +#endif struct sched_clock_data { /* @@ -88,7 +92,7 @@ void sched_clock_init(void) } /* - * min,max except they take wrapping into account + * min, max except they take wrapping into account */ static inline u64 wrap_min(u64 x, u64 y) @@ -117,10 +121,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) if (unlikely(delta < 0)) delta = 0; + if (unlikely(!sched_clock_running)) + return 0ull; + /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * scd->tick_gtod + TICK_NSEC); + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); */ clock = scd->tick_gtod + delta; @@ -149,8 +156,13 @@ static void lock_double_clock(struct sched_clock_data *data1, u64 sched_clock_cpu(int cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); u64 now, clock, this_clock, remote_clock; + struct sched_clock_data *scd; + + if (sched_clock_stable) + return sched_clock(); + + scd = cpu_sdc(cpu); /* * Normally this is not called in NMI context - but if it is, @@ -201,6 +213,8 @@ u64 sched_clock_cpu(int cpu) return clock; } +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + void sched_clock_tick(void) { struct sched_clock_data *scd = this_scd(); @@ -243,22 +257,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); -#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ - -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - -u64 sched_clock_cpu(int cpu) -{ - if (unlikely(!sched_clock_running)) - return 0; - - return sched_clock(); -} - -#endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ unsigned long long cpu_clock(int cpu) {