x86, tracing, perf: Add trace point for MSR accesses

For debugging low level code interacting with the CPU it is often
useful to trace the MSR read/writes. This gives a concise summary of
PMU and other operations.

perf has an ad-hoc way to do this using trace_printk, but it's
somewhat limited (and also now spews ugly boot messages when enabled)

Instead define real trace points for all MSR accesses.

This adds three new trace points: read_msr and write_msr and rdpmc.

They also report if the access faulted (if *_safe is used)

This allows filtering and triggering on specific MSR values, which
allows various more advanced debugging techniques.

All the values are well defined in the CPU documentation.

The trace can be post processed with
Documentation/trace/postprocess/decode_msr.py to add symbolic MSR
names to the trace.

I only added it to native MSR accesses in C, not paravirtualized or in
entry*.S (which is not too interesting)

Originally the patch kit moved the MSRs out of line.  This uses an
alternative approach recommended by Steven Rostedt of only moving the
trace calls out of line, but open coding the access to the jump label.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1449018060-1742-3-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andi Kleen 2015-12-01 17:00:59 -08:00 committed by Ingo Molnar
parent bd2a634d9e
commit 7f47d8cc03
5 changed files with 188 additions and 0 deletions

View File

@ -0,0 +1,37 @@
The x86 kernel supports tracing most MSR (Model Specific Register) accesses.
To see the definition of the MSRs on Intel systems please see the SDM
at http://www.intel.com/sdm (Volume 3)
Available trace points:
/sys/kernel/debug/tracing/events/msr/
Trace MSR reads
read_msr
msr: MSR number
val: Value written
failed: 1 if the access failed, otherwise 0
Trace MSR writes
write_msr
msr: MSR number
val: Value written
failed: 1 if the access failed, otherwise 0
Trace RDPMC in kernel
rdpmc
The trace data can be post processed with the postprocess/decode_msr.py script
cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h
to add symbolic MSR names.

View File

@ -0,0 +1,37 @@
#!/usr/bin/python
# add symbolic names to read_msr / write_msr in trace
# decode_msr msr-index.h < trace
import sys
import re
msrs = dict()
with open(sys.argv[1] if len(sys.argv) > 1 else "msr-index.h", "r") as f:
for j in f:
m = re.match(r'#define (MSR_\w+)\s+(0x[0-9a-fA-F]+)', j)
if m:
msrs[int(m.group(2), 16)] = m.group(1)
extra_ranges = (
( "MSR_LASTBRANCH_%d_FROM_IP", 0x680, 0x69F ),
( "MSR_LASTBRANCH_%d_TO_IP", 0x6C0, 0x6DF ),
( "LBR_INFO_%d", 0xdc0, 0xddf ),
)
for j in sys.stdin:
m = re.search(r'(read|write)_msr:\s+([0-9a-f]+)', j)
if m:
r = None
num = int(m.group(2), 16)
if num in msrs:
r = msrs[num]
else:
for er in extra_ranges:
if er[1] <= num <= er[2]:
r = er[0] % (num - er[1],)
break
if r:
j = j.replace(" " + m.group(2), " " + r + "(" + m.group(2) + ")")
print j,

View File

@ -0,0 +1,57 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM msr
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE msr-trace
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH asm/
#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MSR_H
#include <linux/tracepoint.h>
/*
* Tracing for x86 model specific registers. Directly maps to the
* RDMSR/WRMSR instructions.
*/
DECLARE_EVENT_CLASS(msr_trace_class,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed),
TP_STRUCT__entry(
__field( unsigned, msr )
__field( u64, val )
__field( int, failed )
),
TP_fast_assign(
__entry->msr = msr;
__entry->val = val;
__entry->failed = failed;
),
TP_printk("%x, value %llx%s",
__entry->msr,
__entry->val,
__entry->failed ? " #GP" : "")
);
DEFINE_EVENT(msr_trace_class, read_msr,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);
DEFINE_EVENT(msr_trace_class, write_msr,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);
DEFINE_EVENT(msr_trace_class, rdpmc,
TP_PROTO(unsigned msr, u64 val, int failed),
TP_ARGS(msr, val, failed)
);
#endif /* _TRACE_MSR_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -57,11 +57,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
#ifdef CONFIG_TRACEPOINTS
/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>
extern struct tracepoint __tracepoint_read_msr;
extern struct tracepoint __tracepoint_write_msr;
extern struct tracepoint __tracepoint_rdpmc;
#define msr_tracepoint_active(t) static_key_false(&(t).key)
extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
#else
#define msr_tracepoint_active(t) false
static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
#endif
static inline unsigned long long native_read_msr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);
asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@ -78,6 +101,8 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
}
@ -85,6 +110,8 @@ static inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
/* Can be uninlined because referenced by paravirt */
@ -102,6 +129,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}
@ -160,6 +189,8 @@ static inline unsigned long long native_read_pmc(int counter)
DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
if (msr_tracepoint_active(__tracepoint_rdpmc))
do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}

View File

@ -1,6 +1,8 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <asm/msr.h>
#define CREATE_TRACE_POINTS
#include <asm/msr-trace.h>
struct msr *msrs_alloc(void)
{
@ -108,3 +110,27 @@ int msr_clear_bit(u32 msr, u8 bit)
{
return __flip_bit(msr, bit, false);
}
#ifdef CONFIG_TRACEPOINTS
void do_trace_write_msr(unsigned msr, u64 val, int failed)
{
trace_write_msr(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_write_msr);
EXPORT_TRACEPOINT_SYMBOL(write_msr);
void do_trace_read_msr(unsigned msr, u64 val, int failed)
{
trace_read_msr(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_read_msr);
EXPORT_TRACEPOINT_SYMBOL(read_msr);
void do_trace_rdpmc(unsigned counter, u64 val, int failed)
{
trace_rdpmc(counter, val, failed);
}
EXPORT_SYMBOL(do_trace_rdpmc);
EXPORT_TRACEPOINT_SYMBOL(rdpmc);
#endif