2008-10-23 13:26:29 +08:00
|
|
|
#ifndef _ASM_X86_TIMER_H
|
|
|
|
#define _ASM_X86_TIMER_H
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
2005-09-04 06:57:07 +08:00
|
|
|
#include <linux/pm.h>
|
2008-01-30 20:30:06 +08:00
|
|
|
#include <linux/percpu.h>
|
2009-02-23 07:34:39 +08:00
|
|
|
#include <linux/interrupt.h>
|
sched/clock, x86: Use mul_u64_u32_shr() for native_sched_clock()
Use mul_u64_u32_shr() so that x86_64 can use a single 64x64->128 mul.
Before:
0000000000000560 <native_sched_clock>:
560: 44 8b 1d 00 00 00 00 mov 0x0(%rip),%r11d # 567 <native_sched_clock+0x7>
567: 55 push %rbp
568: 48 89 e5 mov %rsp,%rbp
56b: 45 85 db test %r11d,%r11d
56e: 75 4f jne 5bf <native_sched_clock+0x5f>
570: 0f 31 rdtsc
572: 89 c0 mov %eax,%eax
574: 48 c1 e2 20 shl $0x20,%rdx
578: 48 c7 c1 00 00 00 00 mov $0x0,%rcx
57f: 48 09 c2 or %rax,%rdx
582: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
589: 65 8b 04 25 00 00 00 mov %gs:0x0,%eax
590: 00
591: 48 98 cltq
593: 48 8b 34 c5 00 00 00 mov 0x0(,%rax,8),%rsi
59a: 00
59b: 48 89 d0 mov %rdx,%rax
59e: 81 e2 ff 03 00 00 and $0x3ff,%edx
5a4: 48 c1 e8 0a shr $0xa,%rax
5a8: 48 0f af 14 0e imul (%rsi,%rcx,1),%rdx
5ad: 48 0f af 04 0e imul (%rsi,%rcx,1),%rax
5b2: 5d pop %rbp
5b3: 48 03 04 3e add (%rsi,%rdi,1),%rax
5b7: 48 c1 ea 0a shr $0xa,%rdx
5bb: 48 01 d0 add %rdx,%rax
5be: c3 retq
After:
0000000000000550 <native_sched_clock>:
550: 8b 3d 00 00 00 00 mov 0x0(%rip),%edi # 556 <native_sched_clock+0x6>
556: 55 push %rbp
557: 48 89 e5 mov %rsp,%rbp
55a: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
55e: 85 ff test %edi,%edi
560: 75 2c jne 58e <native_sched_clock+0x3e>
562: 0f 31 rdtsc
564: 89 c0 mov %eax,%eax
566: 48 c1 e2 20 shl $0x20,%rdx
56a: 48 09 c2 or %rax,%rdx
56d: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
574: 00 00
576: 89 c0 mov %eax,%eax
578: 48 f7 e2 mul %rdx
57b: 65 48 8b 0c 25 00 00 mov %gs:0x0,%rcx
582: 00 00
584: c9 leaveq
585: 48 0f ac d0 0a shrd $0xa,%rdx,%rax
58a: 48 01 c8 add %rcx,%rax
58d: c3 retq
MAINLINE POST
sched_clock_stable: 1 1
(cold) sched_clock: 329841 331312
(cold) local_clock: 301773 310296
(warm) sched_clock: 38375 38247
(warm) local_clock: 100371 102713
(warm) rdtsc: 27340 27289
sched_clock_stable: 0 0
(cold) sched_clock: 382634 372706
(cold) local_clock: 396890 399275
(warm) sched_clock: 38194 38124
(warm) local_clock: 143452 148698
(warm) rdtsc: 27345 27365
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-piu203ses5y1g36bnyw2n16x@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-11-30 01:04:39 +08:00
|
|
|
#include <linux/math64.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#define TICK_SIZE (tick_nsec / 1000)
|
2007-03-05 16:30:35 +08:00
|
|
|
|
|
|
|
unsigned long long native_sched_clock(void);
|
2009-08-20 21:54:21 +08:00
|
|
|
extern int recalibrate_cpu_khz(void);
|
2007-03-05 16:30:35 +08:00
|
|
|
|
2008-07-22 00:22:51 +08:00
|
|
|
extern int no_timer_check;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-01-30 20:30:06 +08:00
|
|
|
/* Accelerators for sched_clock()
|
2007-07-18 09:37:04 +08:00
|
|
|
* convert from cycles(64bits) => nanoseconds (64bits)
|
|
|
|
* basic equation:
|
|
|
|
* ns = cycles / (freq / ns_per_sec)
|
|
|
|
* ns = cycles * (ns_per_sec / freq)
|
|
|
|
* ns = cycles * (10^9 / (cpu_khz * 10^3))
|
|
|
|
* ns = cycles * (10^6 / cpu_khz)
|
|
|
|
*
|
|
|
|
* Then we use scaling math (suggested by george@mvista.com) to get:
|
|
|
|
* ns = cycles * (10^6 * SC / cpu_khz) / SC
|
|
|
|
* ns = cycles * cyc2ns_scale / SC
|
|
|
|
*
|
|
|
|
* And since SC is a constant power of two, we can convert the div
|
|
|
|
* into a shift.
|
|
|
|
*
|
2008-01-30 20:30:06 +08:00
|
|
|
* We can use khz divisor instead of mhz to keep a better precision, since
|
2007-07-18 09:37:04 +08:00
|
|
|
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
|
|
|
|
* (mathieu.desnoyers@polymtl.ca)
|
|
|
|
*
|
|
|
|
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
2011-11-16 06:12:06 +08:00
|
|
|
*
|
|
|
|
* In:
|
|
|
|
*
|
|
|
|
* ns = cycles * cyc2ns_scale / SC
|
|
|
|
*
|
|
|
|
* Although we may still have enough bits to store the value of ns,
|
|
|
|
* in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
|
|
|
|
* leading to an incorrect result.
|
|
|
|
*
|
|
|
|
* To avoid this, we can decompose 'cycles' into quotient and remainder
|
|
|
|
* of division by SC. Then,
|
|
|
|
*
|
|
|
|
* ns = (quot * SC + rem) * cyc2ns_scale / SC
|
|
|
|
* = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
|
|
|
|
*
|
|
|
|
* - sqazi@google.com
|
2007-07-18 09:37:04 +08:00
|
|
|
*/
|
2008-01-30 20:30:06 +08:00
|
|
|
|
|
|
|
DECLARE_PER_CPU(unsigned long, cyc2ns);
|
2009-06-17 03:34:17 +08:00
|
|
|
DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
|
2007-07-18 09:37:04 +08:00
|
|
|
|
|
|
|
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
|
|
|
|
2008-01-30 20:30:06 +08:00
|
|
|
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
|
2007-07-18 09:37:04 +08:00
|
|
|
{
|
sched/clock, x86: Use mul_u64_u32_shr() for native_sched_clock()
Use mul_u64_u32_shr() so that x86_64 can use a single 64x64->128 mul.
Before:
0000000000000560 <native_sched_clock>:
560: 44 8b 1d 00 00 00 00 mov 0x0(%rip),%r11d # 567 <native_sched_clock+0x7>
567: 55 push %rbp
568: 48 89 e5 mov %rsp,%rbp
56b: 45 85 db test %r11d,%r11d
56e: 75 4f jne 5bf <native_sched_clock+0x5f>
570: 0f 31 rdtsc
572: 89 c0 mov %eax,%eax
574: 48 c1 e2 20 shl $0x20,%rdx
578: 48 c7 c1 00 00 00 00 mov $0x0,%rcx
57f: 48 09 c2 or %rax,%rdx
582: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
589: 65 8b 04 25 00 00 00 mov %gs:0x0,%eax
590: 00
591: 48 98 cltq
593: 48 8b 34 c5 00 00 00 mov 0x0(,%rax,8),%rsi
59a: 00
59b: 48 89 d0 mov %rdx,%rax
59e: 81 e2 ff 03 00 00 and $0x3ff,%edx
5a4: 48 c1 e8 0a shr $0xa,%rax
5a8: 48 0f af 14 0e imul (%rsi,%rcx,1),%rdx
5ad: 48 0f af 04 0e imul (%rsi,%rcx,1),%rax
5b2: 5d pop %rbp
5b3: 48 03 04 3e add (%rsi,%rdi,1),%rax
5b7: 48 c1 ea 0a shr $0xa,%rdx
5bb: 48 01 d0 add %rdx,%rax
5be: c3 retq
After:
0000000000000550 <native_sched_clock>:
550: 8b 3d 00 00 00 00 mov 0x0(%rip),%edi # 556 <native_sched_clock+0x6>
556: 55 push %rbp
557: 48 89 e5 mov %rsp,%rbp
55a: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
55e: 85 ff test %edi,%edi
560: 75 2c jne 58e <native_sched_clock+0x3e>
562: 0f 31 rdtsc
564: 89 c0 mov %eax,%eax
566: 48 c1 e2 20 shl $0x20,%rdx
56a: 48 09 c2 or %rax,%rdx
56d: 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
574: 00 00
576: 89 c0 mov %eax,%eax
578: 48 f7 e2 mul %rdx
57b: 65 48 8b 0c 25 00 00 mov %gs:0x0,%rcx
582: 00 00
584: c9 leaveq
585: 48 0f ac d0 0a shrd $0xa,%rdx,%rax
58a: 48 01 c8 add %rcx,%rax
58d: c3 retq
MAINLINE POST
sched_clock_stable: 1 1
(cold) sched_clock: 329841 331312
(cold) local_clock: 301773 310296
(warm) sched_clock: 38375 38247
(warm) local_clock: 100371 102713
(warm) rdtsc: 27340 27289
sched_clock_stable: 0 0
(cold) sched_clock: 382634 372706
(cold) local_clock: 396890 399275
(warm) sched_clock: 38194 38124
(warm) local_clock: 143452 148698
(warm) rdtsc: 27345 27365
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-piu203ses5y1g36bnyw2n16x@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-11-30 01:04:39 +08:00
|
|
|
unsigned long long ns = this_cpu_read(cyc2ns_offset);
|
|
|
|
ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
|
2009-06-17 03:34:17 +08:00
|
|
|
return ns;
|
2007-07-18 09:37:04 +08:00
|
|
|
}
|
|
|
|
|
2008-01-30 20:30:06 +08:00
|
|
|
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
|
|
|
{
|
|
|
|
unsigned long long ns;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
ns = __cycles_2_ns(cyc);
|
|
|
|
local_irq_restore(flags);
|
|
|
|
|
|
|
|
return ns;
|
|
|
|
}
|
2007-07-18 09:37:04 +08:00
|
|
|
|
2008-10-23 13:26:29 +08:00
|
|
|
#endif /* _ASM_X86_TIMER_H */
|