perf script: Enable printing of branch stack

This patch improves perf script by enabling printing of the
branch stack via the 'brstack' and 'brstacksym' arguments to
the field selection option -F. The option is off by default
and operates only if the perf.data file has branch stack content.

The branches are printed in to/from pairs. The most recent branch
is printed first. The number of branch entries vary based on the
underlying hardware and filtering used.

The brstack prints FROM/TO addresses in raw hexadecimal format.
The brstacksym prints FROM/TO addresses in symbolic form wherever
possible.

 $ perf script -F ip,brstack
  5d3000 0x401aa0/0x5d2000/M/-/-/-/0 ...

 $ perf script -F ip,brstacksym
  4011e0 noploop+0x0/noploop+0x0/P/-/-/0

The notation F/T/M/X/A/C describes the attributes of the branch.
F=from, T=to, M/P=misprediction/prediction, X=TSX, A=TSX abort, C=cycles (SKL)

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yuanfang Chen <cyfmxc@gmail.com>
Link: http://lkml.kernel.org/r/1441039273-16260-5-git-send-email-eranian@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Stephane Eranian 2015-08-31 18:41:13 +02:00 committed by Arnaldo Carvalho de Melo
parent 729a78417a
commit dc323ce8e7
2 changed files with 93 additions and 3 deletions

View File

@ -112,11 +112,11 @@ OPTIONS
--debug-mode:: --debug-mode::
Do various checks like samples ordering and lost events. Do various checks like samples ordering and lost events.
-f:: -F::
--fields:: --fields::
Comma separated list of fields to print. Options are: Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, flags. srcline, period, iregs, brstack, brstacksym, flags.
Field list can be prepended with the type, trace, sw or hw, Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies. to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@ -175,6 +175,16 @@ OPTIONS
Finally, a user may not set fields to none for all event types. Finally, a user may not set fields to none for all event types.
i.e., -f "" is not allowed. i.e., -f "" is not allowed.
The brstack output includes branch related information with raw addresses using the
/v/v/v/v/ syntax in the following order:
FROM: branch source instruction
TO : branch target instruction
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
A/- : A=TSX abort entry, -=not aborted region or not supported
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
-k:: -k::
--vmlinux=<file>:: --vmlinux=<file>::
vmlinux pathname vmlinux pathname

View File

@ -51,6 +51,8 @@ enum perf_output_field {
PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13, PERF_OUTPUT_PERIOD = 1U << 13,
PERF_OUTPUT_IREGS = 1U << 14, PERF_OUTPUT_IREGS = 1U << 14,
PERF_OUTPUT_BRSTACK = 1U << 15,
PERF_OUTPUT_BRSTACKSYM = 1U << 16,
}; };
struct output_option { struct output_option {
@ -72,6 +74,8 @@ struct output_option {
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD}, {.str = "period", .field = PERF_OUTPUT_PERIOD},
{.str = "iregs", .field = PERF_OUTPUT_IREGS}, {.str = "iregs", .field = PERF_OUTPUT_IREGS},
{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
}; };
/* default set to maintain compatibility with current format */ /* default set to maintain compatibility with current format */
@ -425,6 +429,77 @@ static void print_sample_start(struct perf_sample *sample,
} }
} }
static inline char
mispred_str(struct branch_entry *br)
{
if (!(br->flags.mispred || br->flags.predicted))
return '-';
return br->flags.predicted ? 'P' : 'M';
}
static void print_sample_brstack(union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct thread *thread __maybe_unused,
struct perf_event_attr *attr __maybe_unused)
{
struct branch_stack *br = sample->branch_stack;
u64 i;
if (!(br && br->nr))
return;
for (i = 0; i < br->nr; i++) {
printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
br->entries[i].from,
br->entries[i].to,
mispred_str( br->entries + i),
br->entries[i].flags.in_tx? 'X' : '-',
br->entries[i].flags.abort? 'A' : '-',
br->entries[i].flags.cycles);
}
}
static void print_sample_brstacksym(union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct thread *thread __maybe_unused,
struct perf_event_attr *attr __maybe_unused)
{
struct branch_stack *br = sample->branch_stack;
struct addr_location alf, alt;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
u64 i, from, to;
if (!(br && br->nr))
return;
for (i = 0; i < br->nr; i++) {
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
from = br->entries[i].from;
to = br->entries[i].to;
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
if (alf.map)
alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
if (alt.map)
alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
putchar('/');
symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
printf("/%c/%c/%c/%d ",
mispred_str( br->entries + i),
br->entries[i].flags.in_tx? 'X' : '-',
br->entries[i].flags.abort? 'A' : '-',
br->entries[i].flags.cycles);
}
}
static void print_sample_addr(union perf_event *event, static void print_sample_addr(union perf_event *event,
struct perf_sample *sample, struct perf_sample *sample,
struct thread *thread, struct thread *thread,
@ -560,6 +635,11 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
if (PRINT_FIELD(IREGS)) if (PRINT_FIELD(IREGS))
print_sample_iregs(event, sample, thread, attr); print_sample_iregs(event, sample, thread, attr);
if (PRINT_FIELD(BRSTACK))
print_sample_brstack(event, sample, thread, attr);
else if (PRINT_FIELD(BRSTACKSYM))
print_sample_brstacksym(event, sample, thread, attr);
printf("\n"); printf("\n");
} }
@ -1681,7 +1761,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. " "comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. " "Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,flags", parse_output_fields), "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide, OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"), "system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",