From d2c11034406733374d1cdc588c53bb076d95a4e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 May 2016 10:09:33 -0300 Subject: [PATCH 01/17] perf machine: Introduce number of threads member To be used, for instance, for pre-allocating an rb_tree array for sorting by other keys besides the current pid one. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ja0ifkwue7ttjhbwijn6g6eu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 7 ++++++- tools/perf/util/machine.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2cb95bbf9ea6..9d0913107dc7 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->threads = RB_ROOT; pthread_rwlock_init(&machine->threads_lock, NULL); + machine->nr_threads = 0; INIT_LIST_HEAD(&machine->dead_threads); machine->last_match = NULL; @@ -430,6 +431,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, */ thread__get(th); machine->last_match = th; + ++machine->nr_threads; } return th; @@ -681,11 +683,13 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret = 0; + size_t ret; struct rb_node *nd; pthread_rwlock_rdlock(&machine->threads_lock); + ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); @@ -1419,6 +1423,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); RB_CLEAR_NODE(&th->rb_node); + --machine->nr_threads; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 4822de5e4544..83f46790c52f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -31,6 +31,7 @@ struct machine { char *root_dir; struct rb_root threads; pthread_rwlock_t threads_lock; + unsigned int nr_threads; struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; From f58c253564815db541e93b5411c2b47dbcb2f0ea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 May 2016 11:44:28 -0300 Subject: [PATCH 02/17] perf tools: Add template for generating rbtree resort class Sometimes we want to sort an existing rbtree by a different key, introduce a template for that, that needs only to be provided the rbtree root and the number of entries in it. To do that a new rbtree will be created with extra space for each entry, where possibly pre-calculated keys will be stored to be used in the resort process and also later, when using the newly sorted rbtree. Please check the following two changesets to see it in use for resorting stats for threads and its syscalls in 'perf trace --summary'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-9l6e1q34lmf3wwdeewstyakg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/rb_resort.h | 149 ++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 tools/perf/util/rb_resort.h diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h new file mode 100644 index 000000000000..abc76e3d3098 --- /dev/null +++ b/tools/perf/util/rb_resort.h @@ -0,0 +1,149 @@ +#ifndef _PERF_RESORT_RB_H_ +#define _PERF_RESORT_RB_H_ +/* + * Template for creating a class to resort an existing rb_tree according to + * a new sort criteria, that must be present in the entries of the source + * rb_tree. + * + * (c) 2016 Arnaldo Carvalho de Melo + * + * Quick example, resorting threads by its shortname: + * + * First define the prefix (threads) to be used for the functions and data + * structures created, and provide an expression for the sorting, then the + * fields to be present in each of the entries in the new, sorted, rb_tree. + * + * The body of the init function should collect the fields, maybe + * pre-calculating them from multiple entries in the original 'entry' from + * the rb_tree used as a source for the entries to be sorted: + +DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, + b->thread->shortname) < 0, + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); +} + + * After this it is just a matter of instantiating it and iterating it, + * for a few data structures with existing rb_trees, such as 'struct machine', + * helpers are available to get the rb_root and the nr_entries: + + DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); + + * This will instantiate the new rb_tree and a cursor for it, that can be used as: + + struct rb_node *nd; + + resort_rb__for_each(nd, threads) { + struct thread *t = threads_entry; + printf("%s: %d\n", t->shortname, t->tid); + } + + * Then delete it: + + resort_rb__delete(threads); + + * The name of the data structures and functions will have a _sorted suffix + * right before the method names, i.e. will look like: + * + * struct threads_sorted_entry {} + * threads_sorted__insert() + */ + +#define DEFINE_RESORT_RB(__name, __comp, ...) \ +struct __name##_sorted_entry { \ + struct rb_node rb_node; \ + __VA_ARGS__ \ +}; \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry); \ + \ +static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ +{ \ + struct __name##_sorted_entry *a, *b; \ + a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ + b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ + return __comp; \ +} \ + \ +struct __name##_sorted { \ + struct rb_root entries; \ + struct __name##_sorted_entry nd[0]; \ +}; \ + \ +static void __name##_sorted__insert(struct __name##_sorted *sorted, \ + struct rb_node *sorted_nd) \ +{ \ + struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ + while (*p != NULL) { \ + parent = *p; \ + if (__name##_sorted__cmp(sorted_nd, parent)) \ + p = &(*p)->rb_left; \ + else \ + p = &(*p)->rb_right; \ + } \ + rb_link_node(sorted_nd, parent, p); \ + rb_insert_color(sorted_nd, &sorted->entries); \ +} \ + \ +static void __name##_sorted__sort(struct __name##_sorted *sorted, \ + struct rb_root *entries) \ +{ \ + struct rb_node *nd; \ + unsigned int i = 0; \ + for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ + struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ + __name##_sorted__init_entry(nd, snd); \ + __name##_sorted__insert(sorted, &snd->rb_node); \ + } \ +} \ + \ +static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ + int nr_entries) \ +{ \ + struct __name##_sorted *sorted; \ + sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ + if (sorted) { \ + sorted->entries = RB_ROOT; \ + __name##_sorted__sort(sorted, entries); \ + } \ + return sorted; \ +} \ + \ +static void __name##_sorted__delete(struct __name##_sorted *sorted) \ +{ \ + free(sorted); \ +} \ + \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry) + +#define DECLARE_RESORT_RB(__name) \ +struct __name##_sorted_entry *__name##_entry; \ +struct __name##_sorted *__name = __name##_sorted__new + +#define resort_rb__for_each(__nd, __name) \ + for (__nd = rb_first(&__name->entries); \ + __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ + rb_node), __nd; \ + __nd = rb_next(__nd)) + +#define resort_rb__delete(__name) \ + __name##_sorted__delete(__name), __name = NULL + +/* + * Helpers for other classes that contains both an rbtree and the + * number of entries in it: + */ + +/* For 'struct intlist' */ +#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ + DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ + __ilist->rblist.nr_entries) + +/* For 'struct machine->threads' */ +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ + DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) + +#endif /* _PERF_RESORT_RB_H_ */ From 96c144512263906cc41a25524fa114c90acd3a01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 May 2016 12:47:16 -0300 Subject: [PATCH 03/17] perf trace: Sort summary output by number of events # trace -a -s sleep 1 |& grep events | tail gmain (1733), 34 events, 1.0%, 0.000 msec hexchat (9765), 46 events, 1.4%, 0.000 msec ssh (11109), 80 events, 2.4%, 0.000 msec sleep (32631), 81 events, 2.4%, 0.000 msec qemu-system-x86 (10021), 272 events, 8.2%, 0.000 msec Xorg (1965), 322 events, 9.7%, 0.000 msec SoftwareVsyncTh (10922), 366 events, 11.1%, 0.000 msec gnome-shell (2231), 446 events, 13.5%, 0.000 msec qemu-system-x86 (9931), 468 events, 14.1%, 0.000 msec firefox (10871), 1098 events, 33.2%, 0.000 msec [root@jouet ~]# Suggested-by: Milian Wolff Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ye4cnprhfeiq32ar4lt60dqs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 49 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9e38fe973f0c..aac0074cc926 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -36,6 +36,7 @@ #include "util/bpf-loader.h" #include "callchain.h" #include "syscalltbl.h" +#include "rb_resort.h" #include /* FIXME: Still needed for audit_errno_to_name */ #include @@ -2829,19 +2830,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, return printed; } -/* struct used to pass data to per-thread function */ -struct summary_data { - FILE *fp; - struct trace *trace; - size_t printed; -}; - -static int trace__fprintf_one_thread(struct thread *thread, void *priv) +static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) { - struct summary_data *data = priv; - FILE *fp = data->fp; - size_t printed = data->printed; - struct trace *trace = data->trace; + size_t printed = 0; struct thread_trace *ttrace = thread__priv(thread); double ratio; @@ -2860,22 +2851,38 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); printed += thread__dump_stats(ttrace, trace, fp); - data->printed += printed; + return printed; +} - return 0; +static unsigned long thread__nr_events(struct thread_trace *ttrace) +{ + return ttrace ? ttrace->nr_events : 0; +} + +DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - struct summary_data data = { - .fp = fp, - .trace = trace - }; - data.printed = trace__fprintf_threads_header(fp); + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); + size_t printed = trace__fprintf_threads_header(fp); + struct rb_node *nd; - machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - return data.printed; + resort_rb__for_each(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + + resort_rb__delete(threads); + + return printed; } static int trace__set_duration(const struct option *opt, const char *str, From b535d523dce58e8e94f7dbf741f1e9b5479e61d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 May 2016 16:06:26 -0300 Subject: [PATCH 04/17] perf trace: Sort syscalls stats by msecs in --summary # trace -a -s sleep 1 Xorg (1965), 788 events, 19.0%, 0.000 msec syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- --------- --------- --------- --------- ------ select 89 731.038 0.000 8.214 175.218 36.71% ioctl 22 0.661 0.010 0.030 0.072 10.43% writev 42 0.253 0.002 0.006 0.011 5.94% recvmsg 60 0.185 0.001 0.003 0.009 5.90% setitimer 60 0.127 0.001 0.002 0.006 6.14% read 52 0.102 0.001 0.002 0.005 8.55% rt_sigprocmask 45 0.092 0.001 0.002 0.023 23.65% poll 12 0.021 0.001 0.002 0.003 7.21% epoll_wait 12 0.019 0.001 0.002 0.002 2.71% firefox (10871), 1080 events, 26.1%, 0.000 msec syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- --------- --------- --------- --------- ------ poll 240 979.562 0.000 4.082 17.132 11.33% recvmsg 240 0.532 0.001 0.002 0.007 3.69% read 60 0.303 0.003 0.005 0.029 8.50% Suggested-by: Milian Wolff Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-52kdkuyxihq0kvc0n2aalhay@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index aac0074cc926..c61e61240b3b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2785,15 +2785,29 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } +DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, + struct stats *stats; + double msecs; + int syscall; +) +{ + struct int_node *source = rb_entry(nd, struct int_node, rb_node); + struct stats *stats = source->priv; + + entry->syscall = source->i; + entry->stats = stats; + entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; +} + static size_t thread__dump_stats(struct thread_trace *ttrace, struct trace *trace, FILE *fp) { - struct stats *stats; size_t printed = 0; struct syscall *sc; - struct int_node *inode = intlist__first(ttrace->syscall_stats); + struct rb_node *nd; + DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); - if (inode == NULL) + if (syscall_stats == NULL) return 0; printed += fprintf(fp, "\n"); @@ -2802,9 +2816,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); - /* each int_node is a syscall */ - while (inode) { - stats = inode->priv; + resort_rb__for_each(nd, syscall_stats) { + struct stats *stats = syscall_stats_entry->stats; if (stats) { double min = (double)(stats->min) / NSEC_PER_MSEC; double max = (double)(stats->max) / NSEC_PER_MSEC; @@ -2815,16 +2828,15 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; avg /= NSEC_PER_MSEC; - sc = &trace->syscalls.table[inode->i]; + sc = &trace->syscalls.table[syscall_stats_entry->syscall]; printed += fprintf(fp, " %-15s", sc->name); printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", - n, avg * n, min, avg); + n, syscall_stats_entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } - - inode = intlist__next(inode); } + resort_rb__delete(syscall_stats); printed += fprintf(fp, "\n\n"); return printed; From 03548ebf6d8cc8a3a782121cf3e54ea41230e227 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 May 2016 15:46:50 -0300 Subject: [PATCH 05/17] perf trace: Do not show the runtime_ms for a thread when not collecting it That field is only updated when we use the "sched:sched_stat_runtime" tracepoint, and that is only done so far when we use the '--stat' command line option, without it we get just zeros, confusing the users: Without this patch: # trace -a -s sleep 1 qemu-system-x86 (9931), 468 events, 9.6%, 0.000 msec syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) ---------- ------ --------- --------- --------- --------- ------ ppoll 98 982.374 0.000 10.024 29.983 12.65% write 34 0.401 0.005 0.012 0.027 5.49% ioctl 102 0.347 0.002 0.003 0.007 3.08% firefox (10871), 1856 events, 38.2%, 0.000 msec (msec) (msec) (msec) (msec) (%) ---------- ------ --------- --------- --------- --------- ------ poll 395 934.873 0.000 2.367 17.120 11.51% recvmsg 395 0.988 0.001 0.003 0.021 4.20% read 106 0.460 0.002 0.004 0.007 3.17% futex 24 0.108 0.001 0.004 0.010 10.05% mmap 2 0.041 0.016 0.021 0.026 23.92% write 6 0.027 0.004 0.004 0.005 2.52% After this patch that ', 0.000 msecs' gets suppressed when --stat is not in use. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-p7emqrsw7900tdkg43v9l1e1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c61e61240b3b..66aa2a00414b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2860,7 +2860,11 @@ static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trac printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); if (ttrace->pfmin) printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); - printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + if (trace->sched) + printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + else if (fputc('\n', fp) != EOF) + ++printed; + printed += thread__dump_stats(ttrace, trace, fp); return printed; From 4679bccaa30893ccc5be35c5c5d44f5ab60c0a08 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 5 May 2016 20:53:19 +0530 Subject: [PATCH 06/17] perf tools powerpc: Add support for generating bpf prologue Generalize existing macros to serve the purpose. Signed-off-by: Naveen N. Rao Cc: Ian Munsie Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Wang Nan Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1462461799-17518-1-git-send-email-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/Makefile | 1 + tools/perf/arch/powerpc/util/dwarf-regs.c | 40 ++++++++++++++++------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 56e05f126ad8..cc3930904d68 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 733151cdf46e..41bdf9530d82 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -10,19 +10,26 @@ */ #include +#include +#include #include - +#include +#include +#include "util.h" struct pt_regs_dwarfnum { const char *name; unsigned int dwarfnum; + unsigned int ptregs_offset; }; -#define STR(s) #s -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%gpr##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} +#define REG_DWARFNUM_NAME(r, num) \ + {.name = STR(%)STR(r), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, r)} +#define GPR_DWARFNUM_NAME(num) \ + {.name = STR(%gpr##num), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, gpr[num])} +#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} /* * Reference: @@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = { GPR_DWARFNUM_NAME(29), GPR_DWARFNUM_NAME(30), GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME("%msr", 66), - REG_DWARFNUM_NAME("%ctr", 109), - REG_DWARFNUM_NAME("%link", 108), - REG_DWARFNUM_NAME("%xer", 101), - REG_DWARFNUM_NAME("%dar", 119), - REG_DWARFNUM_NAME("%dsisr", 118), + REG_DWARFNUM_NAME(msr, 66), + REG_DWARFNUM_NAME(ctr, 109), + REG_DWARFNUM_NAME(link, 108), + REG_DWARFNUM_NAME(xer, 101), + REG_DWARFNUM_NAME(dar, 119), + REG_DWARFNUM_NAME(dsisr, 118), REG_DWARFNUM_END, }; @@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n) return roff->name; return NULL; } + +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_dwarfnum *roff; + for (roff = regdwarfnum_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->ptregs_offset; + return -EINVAL; +} From 52225036fa8f5aca4c1b7b4f12742f72a1bf9d73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:42 +0200 Subject: [PATCH 07/17] perf hists: Move sort__need_collapse into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__need_collapse into struct perf_hpp_list. Adding hists__has macro to easily access this info perf struct hists object. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 ++-- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/tests/hists_common.c | 2 +- tools/perf/tests/hists_cumulate.c | 2 +- tools/perf/tests/hists_link.c | 4 ++-- tools/perf/tests/hists_output.c | 2 +- tools/perf/util/hist.c | 14 +++++++------- tools/perf/util/hist.h | 4 ++++ tools/perf/util/sort.c | 5 ++--- tools/perf/util/sort.h | 1 - 11 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8053a8ceefda..9ce354f469dc 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8d9b88af901d..394d05ec0014 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -936,7 +936,7 @@ repeat: goto error; } - sort__need_collapse = true; + perf_hpp_list.need_collapse = true; } /* Force tty output for header output and per-thread stat. */ diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index da18517b1d40..ff6109839cdd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1255,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ - sort__need_collapse = 1; + perf_hpp_list.need_collapse = 1; if (top.use_stdio) use_browser = 0; diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index f55f4bd47932..6b21746d6eec 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -161,7 +161,7 @@ void print_hists_in(struct hists *hists) struct rb_root *root; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 4a2bbff9b1ee..a9e3db3afac4 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -126,7 +126,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 358324e47805..acf5a1301c07 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -145,7 +145,7 @@ static int __validate_match(struct hists *hists) /* * Only entries from fake_common_samples should have a pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -197,7 +197,7 @@ static int __validate_link(struct hists *hists, int idx) * and some entries will have no pair. However every entry * in other hists should have (dummy) pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 7cd8738e842f..63c5efaba1b5 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -92,7 +92,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0f33d7e698c4..cfab531437c7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -295,7 +295,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) root_in = &he->parent_he->hroot_in; root_out = &he->parent_he->hroot_out; } else { - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; @@ -1373,7 +1373,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) struct hist_entry *n; int ret; - if (!sort__need_collapse) + if (!hists__has(hists, need_collapse)) return 0; hists->nr_entries = 0; @@ -1632,7 +1632,7 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, return; } - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -2036,7 +2036,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -2078,7 +2078,7 @@ static struct hist_entry *hists__find_entry(struct hists *hists, { struct rb_node *n; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) n = hists->entries_collapsed.rb_node; else n = hists->entries_in->rb_node; @@ -2107,7 +2107,7 @@ void hists__match(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(leader, need_collapse)) root = &leader->entries_collapsed; else root = leader->entries_in; @@ -2132,7 +2132,7 @@ int hists__link(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(other, need_collapse)) root = &other->entries_collapsed; else root = other->entries_in; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 588596561cb3..ec76e6bef916 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -82,6 +82,8 @@ struct hists { int nr_hpp_node; }; +#define hists__has(__h, __f) (__h)->hpp_list->__f + struct hist_entry_iter; struct hist_iter_ops { @@ -238,6 +240,8 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; + + int need_collapse; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 47966a1618c7..64ace548dc88 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__need_collapse = 0; int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; @@ -2163,7 +2162,7 @@ static int __sort_dimension__add(struct sort_dimension *sd, return -1; if (sd->entry->se_collapse) - sort__need_collapse = 1; + list->need_collapse = 1; sd->taken = 1; @@ -2746,7 +2745,7 @@ int setup_sorting(struct perf_evlist *evlist) void reset_output_field(void) { - sort__need_collapse = 0; + perf_hpp_list.need_collapse = 0; sort__has_parent = 0; sort__has_sym = 0; sort__has_dso = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f4e35998119..2e1d27326954 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,7 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__need_collapse; extern int sort__has_dso; extern int sort__has_parent; extern int sort__has_sym; From de7e6a7c8bf9ee46dcbee749bc3cdd0d9c21998a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:43 +0200 Subject: [PATCH 08/17] perf hists: Move sort__has_parent into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_parent into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/callchain.c | 2 +- tools/perf/util/hist.h | 1 + tools/perf/util/machine.c | 2 +- tools/perf/util/sort.c | 5 ++--- tools/perf/util/sort.h | 1 - 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 394d05ec0014..87d40e3c4078 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -234,7 +234,7 @@ static int report__setup_sample_type(struct report *rep) sample_type |= PERF_SAMPLE_BRANCH_STACK; if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { - if (sort__has_parent) { + if (perf_hpp_list.parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " "'perf record' without -g?\n"); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index aa248dcb4440..07fd30bc2f81 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -799,7 +799,7 @@ int sample__resolve_callchain(struct perf_sample *sample, return 0; if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || - sort__has_parent) { + perf_hpp_list.parent) { return thread__resolve_callchain(al->thread, cursor, evsel, sample, parent, al, max_stack); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ec76e6bef916..57b09791c339 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -242,6 +242,7 @@ struct perf_hpp_list { struct list_head sorts; int need_collapse; + int parent; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9d0913107dc7..8c7bf4dbd479 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1652,7 +1652,7 @@ static int add_callchain_ip(struct thread *thread, } if (al.sym != NULL) { - if (sort__has_parent && !*parent && + if (perf_hpp_list.parent && !*parent && symbol__match_regex(al.sym, &parent_regex)) *parent = al.sym; else if (have_ignore_callees && root_al && diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 64ace548dc88..75b33d387f50 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; @@ -2244,7 +2243,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, pr_err("Invalid regex: %s\n%s", parent_pattern, err); return -EINVAL; } - sort__has_parent = 1; + list->parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; /* @@ -2746,7 +2745,7 @@ int setup_sorting(struct perf_evlist *evlist) void reset_output_field(void) { perf_hpp_list.need_collapse = 0; - sort__has_parent = 0; + perf_hpp_list.parent = 0; sort__has_sym = 0; sort__has_dso = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 2e1d27326954..0e44aea86800 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,7 +32,6 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__has_dso; -extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; extern int sort__has_thread; From 2e0453af4e16e97268b9e66aad37beb607ed7c0f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:44 +0200 Subject: [PATCH 09/17] perf hists: Move sort__has_sym into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_sym into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 ++-- tools/perf/ui/browsers/hists.c | 6 +++--- tools/perf/ui/gtk/hists.c | 2 +- tools/perf/ui/hist.c | 2 +- tools/perf/util/annotate.c | 2 +- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 9 ++++----- tools/perf/util/sort.h | 1 - 8 files changed, 13 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ff6109839cdd..39fe06fc19d3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -688,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; - if (sort__has_sym && single) + if (perf_hpp_list.sym && single) perf_top__record_precise_ip(top, he, evsel->idx, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, @@ -919,7 +919,7 @@ out_err: static int callchain_param__setup_sample_type(struct callchain_param *callchain) { - if (!sort__has_sym) { + if (!perf_hpp_list.sym) { if (callchain->enabled) { ui__error("Selected -g but \"sym\" not present in --sort/-s."); return -EINVAL; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 6a4681932ba5..b66bf83ed883 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2747,7 +2747,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, */ goto out_free_stack; case 'a': - if (!sort__has_sym) { + if (!hists__has(hists, sym)) { ui_browser__warning(&browser->b, delay_secs * 2, "Annotation is only available for symbolic views, " "include \"sym*\" in --sort to use it."); @@ -2910,7 +2910,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym || browser->selection == NULL) + if (!hists__has(hists, sym) || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2969,7 +2969,7 @@ skip_annotation: * * See hist_browser__show_entry. */ - if (sort__has_sym && browser->selection->sym) { + if (hists__has(hists, sym) && browser->selection->sym) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 2aa45b606fa4..932adfaa05af 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -379,7 +379,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (symbol_conf.use_callchain && sort__has_sym) { + if (symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? h->stat_acc->period : h->stat.period; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3baeaa6e71b5..af07ffb129ca 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -635,7 +635,7 @@ unsigned int hists__sort_list_width(struct hists *hists) ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); } - if (verbose && sort__has_sym) /* Addr + origin */ + if (verbose && hists__has(hists, sym)) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; return ret; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b795b6994144..d4b3d034c503 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1665,5 +1665,5 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) bool ui__has_annotation(void) { - return use_browser == 1 && sort__has_sym; + return use_browser == 1 && perf_hpp_list.sym; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 57b09791c339..c3a77502e22b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -243,6 +243,7 @@ struct perf_hpp_list { int need_collapse; int parent; + int sym; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 75b33d387f50..544ab376ac42 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; int sort__has_thread = 0; @@ -2245,7 +2244,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, } list->parent = 1; } else if (sd->entry == &sort_sym) { - sort__has_sym = 1; + list->sym = 1; /* * perf diff displays the performance difference amongst * two or more perf.data files. Those files could come @@ -2287,7 +2286,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2303,7 +2302,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return -EINVAL; if (sd->entry == &sort_mem_daddr_sym) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2746,7 +2745,7 @@ void reset_output_field(void) { perf_hpp_list.need_collapse = 0; perf_hpp_list.parent = 0; - sort__has_sym = 0; + perf_hpp_list.sym = 0; sort__has_dso = 0; field_order = NULL; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 0e44aea86800..9a5e7d4a2cac 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,7 +32,6 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__has_dso; -extern int sort__has_sym; extern int sort__has_socket; extern int sort__has_thread; extern int sort__has_comm; From 69849fc5d2119799509026df7a6fd5ffe5a578b3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:45 +0200 Subject: [PATCH 10/17] perf hists: Move sort__has_dso into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_dso into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 ++++---- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 7 +++---- tools/perf/util/sort.h | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b66bf83ed883..6b2f95300e35 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2380,7 +2380,7 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (browser->hists->dso_filter) { @@ -2407,7 +2407,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -2429,10 +2429,10 @@ do_browse_map(struct hist_browser *browser __maybe_unused, } static int -add_map_opt(struct hist_browser *browser __maybe_unused, +add_map_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c3a77502e22b..4302f34f36ae 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -244,6 +244,7 @@ struct perf_hpp_list { int need_collapse; int parent; int sym; + int dso; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 544ab376ac42..2446c39b5fa6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_dso = 0; int sort__has_socket = 0; int sort__has_thread = 0; int sort__has_comm = 0; @@ -241,7 +240,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) * comparing symbol address alone is not enough since it's a * relative address within a dso. */ - if (!sort__has_dso) { + if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) { ret = sort__dso_cmp(left, right); if (ret != 0) return ret; @@ -2255,7 +2254,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, sd->entry->se_collapse = sort__sym_sort; } else if (sd->entry == &sort_dso) { - sort__has_dso = 1; + list->dso = 1; } else if (sd->entry == &sort_socket) { sort__has_socket = 1; } else if (sd->entry == &sort_thread) { @@ -2746,7 +2745,7 @@ void reset_output_field(void) perf_hpp_list.need_collapse = 0; perf_hpp_list.parent = 0; perf_hpp_list.sym = 0; - sort__has_dso = 0; + perf_hpp_list.dso = 0; field_order = NULL; sort_order = NULL; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9a5e7d4a2cac..87d4addf92b5 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,7 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__has_dso; extern int sort__has_socket; extern int sort__has_thread; extern int sort__has_comm; From 35a634f76c02b98d31397a589544022b478c0e12 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:46 +0200 Subject: [PATCH 11/17] perf hists: Move sort__has_socket into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_socket into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/hists.c | 4 ++-- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 3 +-- tools/perf/util/sort.h | 1 - 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 39fe06fc19d3..1793da585676 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -962,7 +962,7 @@ static int __cmd_top(struct perf_top *top) machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout); - if (sort__has_socket) { + if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); if (ret < 0) goto out_err_cpu_topo; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 6b2f95300e35..b25bf82c121f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2534,7 +2534,7 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { - if (!sort__has_socket || act->socket < 0) + if (!hists__has(browser->hists, socket) || act->socket < 0) return 0; if (browser->hists->socket_filter > -1) { @@ -2556,7 +2556,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (!sort__has_socket || socket_id < 0) + if (!hists__has(browser->hists, socket) || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4302f34f36ae..66f313936faf 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -245,6 +245,7 @@ struct perf_hpp_list { int parent; int sym; int dso; + int socket; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2446c39b5fa6..712a71ad76a6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_socket = 0; int sort__has_thread = 0; int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; @@ -2256,7 +2255,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, } else if (sd->entry == &sort_dso) { list->dso = 1; } else if (sd->entry == &sort_socket) { - sort__has_socket = 1; + list->socket = 1; } else if (sd->entry == &sort_thread) { sort__has_thread = 1; } else if (sd->entry == &sort_comm) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 87d4addf92b5..85424a608074 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,7 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__has_socket; extern int sort__has_thread; extern int sort__has_comm; extern enum sort_mode sort__mode; From fa82911a1bdf4f9870ffca097d7fe21128b5639c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:47 +0200 Subject: [PATCH 12/17] perf hists: Move sort__has_thread into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_thread into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 12 ++++++------ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 3 +-- tools/perf/util/sort.h | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b25bf82c121f..dda5b4322945 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2135,7 +2135,7 @@ static int hists__browser_title(struct hists *hists, printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); if (thread) { - if (sort__has_thread) { + if (hists__has(hists, thread)) { printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), @@ -2320,7 +2320,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && !sort__has_comm) || thread == NULL) return 0; if (browser->hists->thread_filter) { @@ -2329,7 +2329,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); @@ -2354,10 +2354,10 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, { int ret; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && !sort__has_comm) || thread == NULL) return 0; - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ret = asprintf(optstr, "Zoom %s %s(%d) thread", browser->hists->thread_filter ? "out of" : "into", thread->comm_set ? thread__comm_str(thread) : "", @@ -2954,7 +2954,7 @@ skip_annotation: goto skip_scripting; if (browser->he_selection) { - if (sort__has_thread && thread) { + if (hists__has(hists, thread) && thread) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 66f313936faf..adeb4049c488 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -246,6 +246,7 @@ struct perf_hpp_list { int sym; int dso; int socket; + int thread; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 712a71ad76a6..000d6e901841 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_thread = 0; int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; @@ -2257,7 +2256,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, } else if (sd->entry == &sort_socket) { list->socket = 1; } else if (sd->entry == &sort_thread) { - sort__has_thread = 1; + list->thread = 1; } else if (sd->entry == &sort_comm) { sort__has_comm = 1; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 85424a608074..e8d1bf147522 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,7 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__has_thread; extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; From 7cecb7fe8388d5c39708d7f07e642ed31bb9d4fe Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 13:54:48 +0200 Subject: [PATCH 13/17] perf hists: Move sort__has_comm into struct perf_hpp_list Now we have sort dimensions private for struct hists, we need to make dimension booleans hists specific as well. Moving sort__has_comm into struct perf_hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462276488-26683-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 ++++-- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 3 +-- tools/perf/util/sort.h | 1 - 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index dda5b4322945..538bae880bfe 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2320,7 +2320,8 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; - if ((!hists__has(browser->hists, thread) && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; if (browser->hists->thread_filter) { @@ -2354,7 +2355,8 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, { int ret; - if ((!hists__has(browser->hists, thread) && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; if (hists__has(browser->hists, thread)) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index adeb4049c488..0f84bfb42bb1 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -247,6 +247,7 @@ struct perf_hpp_list { int dso; int socket; int thread; + int comm; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 000d6e901841..772e2e461ec3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,7 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; /* @@ -2258,7 +2257,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, } else if (sd->entry == &sort_thread) { list->thread = 1; } else if (sd->entry == &sort_comm) { - sort__has_comm = 1; + list->comm = 1; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e8d1bf147522..42927f448bcb 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,7 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; From 239aeba764092b29dd7cab177cd47f472390622e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 12 Apr 2016 14:40:49 +0530 Subject: [PATCH 14/17] perf powerpc: Fix kprobe and kretprobe handling with kallsyms on ppc64le So far, we used to treat probe point offsets as being offset from the LEP. However, userspace applications (objdump/readelf) always show disassembly and offsets from the function GEP. This is confusing to the user as we will end up probing at an address different from what the user expects when looking at the function disassembly with readelf/objdump. Fix this by changing how we modify probe address with perf. If only the function name is provided, we assume the user needs the LEP. Otherwise, if an offset is specified, we assume that the user knows the exact address to probe based on function disassembly, and so we just place the probe from the GEP offset. Finally, kretprobe was also broken with kallsyms as we were trying to specify an offset. This patch also fixes that issue. Reported-by: Ananth N Mavinakayanahalli Signed-off-by: Naveen N. Rao Acked-by: Balbir Singh Cc: Mark Wielaard Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/75df860aad8216bf4b9bcd10c6351ecc0e3dee54.1460451721.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index bbc1a50768dd..6974ba0fa065 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -71,12 +71,21 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map) { /* - * ppc64 ABIv2 local entry point is currently always 2 instructions - * (8 bytes) after the global entry point. + * When probing at a function entry point, we normally always want the + * LEP since that catches calls to the function through both the GEP and + * the LEP. Hence, we would like to probe at an offset of 8 bytes if + * the user only specified the function entry. + * + * However, if the user specifies an offset, we fall back to using the + * GEP since all userspace applications (objdump/readelf) show function + * disassembly with offsets from the GEP. + * + * In addition, we shouldn't specify an offset for kretprobes. */ - if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { - tev->point.address += PPC64LE_LEP_OFFSET; + if (pev->point.offset || pev->point.retprobe || !map) + return; + + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) tev->point.offset += PPC64LE_LEP_OFFSET; - } } #endif From 0b3c2264ae30ed692fd1ffd2b84c5fbdf737cb0d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 12 Apr 2016 14:40:50 +0530 Subject: [PATCH 15/17] perf symbols: Fix kallsyms perf test on ppc64le ppc64le functions have a Global Entry Point (GEP) and a Local Entry Point (LEP). While placing a probe, we always prefer the LEP since it catches function calls through both the GEP and the LEP. In order to do this, we fixup the function entry points during elf symbol table lookup to point to the LEPs. This works, but breaks 'perf test kallsyms' since the symbols loaded from the symbol table (pointing to the LEP) do not match the symbols in kallsyms. To fix this, we do not adjust all the symbols during symbol table load. Instead, we note down st_other in a newly introduced arch-specific member of perf symbol structure, and later use this to adjust the probe trace point. Reported-by: Michael Ellerman Signed-off-by: Naveen N. Rao Acked-by: Ananth N Mavinakayanahalli Acked-by: Balbir Singh Cc: Mark Wielaard Cc: Masami Hiramatsu Cc: Thiago Jung Bauermann Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/6be7c2b17e370100c2f79dd444509df7929bdd3e.1460451721.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 28 +++++++++++++++------ tools/perf/util/probe-event.c | 5 ++-- tools/perf/util/probe-event.h | 3 ++- tools/perf/util/symbol-elf.c | 7 +++--- tools/perf/util/symbol.h | 3 ++- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 6974ba0fa065..c6d0f91731a1 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } -#if defined(_CALL_ELF) && _CALL_ELF == 2 -void arch__elf_sym_adjust(GElf_Sym *sym) -{ - sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); -} -#endif #endif #if !defined(_CALL_ELF) || _CALL_ELF != 2 @@ -65,11 +59,21 @@ bool arch__prefers_symtab(void) return true; } +#ifdef HAVE_LIBELF_SUPPORT +void arch__sym_update(struct symbol *s, GElf_Sym *sym) +{ + s->arch_sym = sym->st_other; +} +#endif + #define PPC64LE_LEP_OFFSET 8 void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map) + struct probe_trace_event *tev, struct map *map, + struct symbol *sym) { + int lep_offset; + /* * When probing at a function entry point, we normally always want the * LEP since that catches calls to the function through both the GEP and @@ -82,10 +86,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || pev->point.retprobe || !map) + if (pev->point.offset || pev->point.retprobe || !map || !sym) return; + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) tev->point.offset += PPC64LE_LEP_OFFSET; + else if (lep_offset) { + if (pev->uprobes) + tev->point.address += lep_offset; + else + tev->point.offset += lep_offset; + } } #endif diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 85d82f4dc5e9..c82c625395ab 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2477,7 +2477,8 @@ static int find_probe_functions(struct map *map, char *name, void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused, struct probe_trace_event *tev __maybe_unused, - struct map *map __maybe_unused) { } + struct map *map __maybe_unused, + struct symbol *sym __maybe_unused) { } /* * Find probe function addresses from map. @@ -2614,7 +2615,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, strdup_or_goto(pev->args[i].type, nomem_out); } - arch__fix_tev_from_maps(pev, tev, map); + arch__fix_tev_from_maps(pev, tev, map, sym); } if (ret == skipped) { ret = -ENOENT; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e2209623f981..5a27eb4fad05 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -154,7 +154,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, int show_available_funcs(const char *module, struct strfilter *filter, bool user); bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map); + struct probe_trace_event *tev, struct map *map, + struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ int e_snprintf(char *str, size_t size, const char *format, ...) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 3f9d6798bd18..87a297dd8901 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -770,7 +770,8 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } -void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { } +void __weak arch__sym_update(struct symbol *s __maybe_unused, + GElf_Sym *sym __maybe_unused) { } int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, @@ -947,8 +948,6 @@ int dso__load_sym(struct dso *dso, struct map *map, (sym.st_value & 1)) --sym.st_value; - arch__elf_sym_adjust(&sym); - if (dso->kernel || kmodule) { char dso_name[PATH_MAX]; @@ -1082,6 +1081,8 @@ new_symbol: if (!f) goto out_elf_end; + arch__sym_update(f, &sym); + if (filter && filter(curr_map, f)) symbol__delete(f); else { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index c8e43979ed5c..07211c2f8456 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -55,6 +55,7 @@ struct symbol { u16 namelen; u8 binding; bool ignore; + u8 arch_sym; char name[0]; }; @@ -323,7 +324,7 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); -void arch__elf_sym_adjust(GElf_Sym *sym); +void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif #define SYMBOL_A 0 From 0f4ccd11813f59d766039dfdd13aa98245a67294 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 27 Apr 2016 02:19:20 +0000 Subject: [PATCH 16/17] perf evlist: Extract perf_mmap__read() Extract event reader from perf_evlist__mmap_read() to perf__mmap_read(). Future commit will feed it with manually computed 'head' and 'old' pointers. Signed-off-by: Wang Nan Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1461723563-67451-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 85271e54a63b..96c71916e367 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -679,24 +679,15 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, return NULL; } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +static union perf_event * +perf_mmap__read(struct perf_mmap *md, bool overwrite, u64 head, + u64 old, u64 *prev) { - struct perf_mmap *md = &evlist->mmap[idx]; - u64 head; - u64 old = md->prev; - int diff; unsigned char *data = md->base + page_size; union perf_event *event = NULL; + int diff = head - old; - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!atomic_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - diff = head - old; - if (evlist->overwrite) { + if (overwrite) { /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. @@ -751,11 +742,29 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) } broken_event: - md->prev = old; + if (prev) + *prev = old; return event; } +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + struct perf_mmap *md = &evlist->mmap[idx]; + u64 head; + u64 old = md->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); + + return perf_mmap__read(md, evlist->overwrite, head, old, &md->prev); +} + static bool perf_mmap__empty(struct perf_mmap *md) { return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; From b6b85dad30ad7e7394990e2317a780577974a4e6 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 27 Apr 2016 02:19:21 +0000 Subject: [PATCH 17/17] perf evlist: Rename variable in perf_mmap__read() In perf_mmap__read(), give better names to pointers. Original name 'old' and 'head' directly related to pointers in ring buffer control page. For backward ring buffer, the meaning of 'head' point is not 'the first byte of free space', but 'the first byte of the last record'. To reduce confusion, rename 'old' to 'start', 'head' to 'end'. 'start' -> 'end' is the direction the records should be read from. Change parameter order. Change 'overwrite' to 'check_messup'. When reading from 'head', no need to check messup for for backward ring buffer. Signed-off-by: Wang Nan Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1461723563-67451-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 96c71916e367..17cd01421e7f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -679,30 +679,31 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, return NULL; } +/* When check_messup is true, 'end' must points to a good entry */ static union perf_event * -perf_mmap__read(struct perf_mmap *md, bool overwrite, u64 head, - u64 old, u64 *prev) +perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, + u64 end, u64 *prev) { unsigned char *data = md->base + page_size; union perf_event *event = NULL; - int diff = head - old; + int diff = end - start; - if (overwrite) { + if (check_messup) { /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. * - * If we somehow ended up ahead of the head, we got messed up. + * If we somehow ended up ahead of the 'end', we got messed up. * - * In either case, truncate and restart at head. + * In either case, truncate and restart at 'end'. */ if (diff > md->mask / 2 || diff < 0) { fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* - * head points to a known good entry, start there. + * 'end' points to a known good entry, start there. */ - old = head; + start = end; diff = 0; } } @@ -710,7 +711,7 @@ perf_mmap__read(struct perf_mmap *md, bool overwrite, u64 head, if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[old & md->mask]; + event = (union perf_event *)&data[start & md->mask]; size = event->header.size; if (size < sizeof(event->header) || diff < (int)size) { @@ -722,8 +723,8 @@ perf_mmap__read(struct perf_mmap *md, bool overwrite, u64 head, * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; + if ((start & md->mask) + size != ((start + size) & md->mask)) { + unsigned int offset = start; unsigned int len = min(sizeof(*event), size), cpy; void *dst = md->event_copy; @@ -738,12 +739,12 @@ perf_mmap__read(struct perf_mmap *md, bool overwrite, u64 head, event = (union perf_event *) md->event_copy; } - old += size; + start += size; } broken_event: if (prev) - *prev = old; + *prev = start; return event; } @@ -762,7 +763,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) head = perf_mmap__read_head(md); - return perf_mmap__read(md, evlist->overwrite, head, old, &md->prev); + return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); } static bool perf_mmap__empty(struct perf_mmap *md)