2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-11-20 00:26:39 +08:00
linux-next/tools/perf
Arnaldo Carvalho de Melo 48fb4fdd6b perf annotate: Handle samples not at objdump output addr boundaries
Without this patch we get this for need_resched:

[root@mica ~]# perf annotate need_resched

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
         :
         :
         :      Disassembly of section .text:
         :
         :      ffffffff810095ed <need_resched>:
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095ed:       55                      push   %rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    0.00 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
         :
         :      static inline struct thread_info *current_thread_info(void)
         :      {
         :              struct thread_info *ti;
         :              ti = (void *)(percpu_read_stable(kernel_stack) +
    0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
    0.00 :      ffffffff810095fa:       00 00
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    0.00 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
    0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
         :      }
    0.00 :      ffffffff8100960b:       c9                      leaveq
    0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
    0.00 :      ffffffff8100960e:       0f 95 c0                setne  %al
    0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
         :      Disassembly of section .vsyscall_0:
         :      Disassembly of section .vsyscall_fn:
         :      Disassembly of section .vsyscall_1:
         :      Disassembly of section .vsyscall_2:
         :      Disassembly of section .init.text:
         :      Disassembly of section .altinstr_replacement:
         :      Disassembly of section .exit.text:
[root@mica ~]#

But from the 'perf report' result we know that there are hits
for need_resched on a 4 way machine mostly doing nothing, so
after adding code to show what is in each hist offset and
collapsing IP hits for what happens between objdump lines we
get, for the same perf.data file:

[root@mica ~]# perf annotate -v need_resched

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
         :
         :
         :      Disassembly of section .text:
         :
         :      ffffffff810095ed <need_resched>:
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095ed:       55                      push   %rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
   52.78 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
         :
         :      static inline struct thread_info *current_thread_info(void)
         :      {
         :              struct thread_info *ti;
         :              ti = (void *)(percpu_read_stable(kernel_stack) +
    0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
    0.00 :      ffffffff810095fa:       00 00
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    9.72 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
    0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
         :      }
    0.00 :      ffffffff8100960b:       c9                      leaveq
    0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
   37.50 :      ffffffff8100960e:       0f 95 c0                setne  %al
    0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
         :      Disassembly of section .vsyscall_0:
         :      Disassembly of section .vsyscall_fn:
         :      Disassembly of section .vsyscall_1:
         :      Disassembly of section .vsyscall_2:
         :      Disassembly of section .init.text:
         :      Disassembly of section .altinstr_replacement:
         :      Disassembly of section .exit.text:
[root@mica ~]#

And now 'perf annotate -v', verbose mode, will show the hits per
precise IP, so that one can make sense of the attribution to
each objdumop line:

[root@mica ~]# perf annotate -v need_resched
Looking at the vmlinux_path (5 entries long)
Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux
for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
                ffffffff810095f1: 152
                ffffffff81009603: 28
                ffffffff8100960f: 55
                ffffffff81009610: 53
                          h->sum: 288
<SNIP same annotation>

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 15:42:49 +01:00
..
bench perf sched: Fix build failure on sparc 2009-12-14 08:59:12 +01:00
Documentation perf/scripts: Tag syscall_name helper as not yet available 2010-02-25 04:07:50 +01:00
scripts perf/scripts: Add syscall tracing scripts 2010-02-25 04:07:48 +01:00
util perf annotate: Defer allocating sym_priv->hist array 2010-02-25 17:39:14 +01:00
.gitignore perf: Ignore perf-archive temp file 2010-01-29 10:37:33 +01:00
builtin-annotate.c perf annotate: Handle samples not at objdump output addr boundaries 2010-02-26 15:42:49 +01:00
builtin-bench.c perf bench: Add "all" pseudo subsystem and "all" pseudo suite 2009-12-14 08:51:19 +01:00
builtin-buildid-cache.c perf buildid-cache: Add new command to manage build-id cache 2010-01-21 08:31:29 +01:00
builtin-buildid-list.c perf build-id: Move the routine to find DSOs with hits to the lib 2010-02-04 09:33:26 +01:00
builtin-diff.c perf tools: Don't cast RIP to pointers 2010-01-16 10:58:45 +01:00
builtin-help.c perf: Make cmd_to_page() function more compact 2010-01-13 10:53:51 +01:00
builtin-kmem.c perf symbols: Remove perf_session usage in symbols layer 2010-02-04 09:33:24 +01:00
builtin-list.c perf list: Fix large list output by using the pager 2009-08-13 09:05:48 +02:00
builtin-lock.c perf lock: Clean up various details 2010-01-31 09:08:27 +01:00
builtin-probe.c perf probe: Don't use a perf_session instance just to resolve symbols 2010-02-04 09:33:26 +01:00
builtin-record.c perf record: Fix existing process callgraph symbol 2010-02-08 16:55:52 +01:00
builtin-report.c Merge branch 'perf/urgent' into perf/core 2010-01-29 10:36:22 +01:00
builtin-sched.c perf tools: Don't cast RIP to pointers 2010-01-16 10:58:45 +01:00
builtin-stat.c perf tools: Fix --pid option for stat 2010-01-13 10:09:08 +01:00
builtin-timechart.c Merge branch 'perf/urgent' into perf/core 2010-01-29 10:36:22 +01:00
builtin-top.c perf top: Use a macro instead of a constant variable 2010-02-25 12:26:16 +01:00
builtin-trace.c perf/scripts: Add Python scripting engine 2010-02-25 04:07:29 +01:00
builtin.h perf lock: Introduce new tool "perf lock", for analyzing lock statistics 2010-01-31 09:08:26 +01:00
command-list.txt perf buildid-cache: Add new command to manage build-id cache 2010-01-21 08:31:29 +01:00
CREDITS perf_counter tools: Add CREDITS file for Git contributors 2009-06-24 19:54:29 +02:00
design.txt perf: Fix few typos + cosmetics 2010-01-13 17:39:44 +01:00
Makefile perf/scripts: Add syscall tracing scripts 2010-02-25 04:07:48 +01:00
perf-archive.sh perf archive: Add helper script to package files needed to do analysis 2010-01-16 10:58:49 +01:00
perf.c perf lock: Introduce new tool "perf lock", for analyzing lock statistics 2010-01-31 09:08:26 +01:00
perf.h perf tools: Allow building for ARM 2009-12-11 13:50:21 +01:00