mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-15 00:04:15 +08:00
cebf7d51a6
This patch prints the stddev and hist for the cycles diff of program block. It can help us to understand if the cycles is noisy or not. This patch is inspired by Andi Kleen's patch: https://lwn.net/Articles/600471/ We create new option '--cycles-hist'. Example: perf record -b ./div perf record -b ./div perf diff -c cycles # Baseline [Program Block Range] Cycles Diff Shared Object Symbol # ........ .......................................................... .... ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 div [.] main 46.72% [div.c:42 -> div.c:44] 0 div [.] main 46.72% [div.c:42 -> div.c:39] 0 div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 [kernel.kallsyms] [k] native_write_msr When we enable the option '--cycles-hist', the output is perf diff -c cycles --cycles-hist # Baseline [Program Block Range] Cycles Diff stddev/Hist Shared Object Symbol # ........ .......................................................... .... ................. ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 ± 37.8% ▁█▁▁██▁█ div [.] main 46.72% [div.c:42 -> div.c:44] 0 ± 49.4% ▁▁▂█▂▂▂▂ div [.] main 46.72% [div.c:42 -> div.c:39] 0 ± 24.1% ▃█▂▄▁▃▂▁ div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 ± 33.5% ▅▂▁█▃▁▂▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 ± 39.4% ▁▁█▁██▅▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 ± 41.2% ▁▃▁▂█▄▃▁ libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 ± 48.8% ▁▁▁▁███▁ libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 ± 75.6% ▃█▁▁▁▁▁▁ libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 ± 42.1% ▁▃▁▁███▁ div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 ± 41.8% ██▁▁▄▁▁▄ div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 ± 37.8% ▁▁▁████▁ libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 ± 38.5% ▄█▁ [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 ± 47.1% ▁█▇▃▁▁ [kernel.kallsyms] [k] native_write_msr v8: --- Rebase to perf/core branch v7: --- 1. v6 got Jiri's ACK. 2. Rebase to latest perf/core branch. v6: --- 1. Jiri provides better code for using data__hpp_register() in ui_init(). Use this code in v6. v5: --- 1. Refine the use of data__hpp_register() in ui_init() according to Jiri's suggestion. v4: --- 1. Rename the new option from '--noisy' to '--cycles-hist' 2. Remove the option '-n'. 3. Only update the spark value and stats when '--cycles-hist' is enabled. 4. Remove the code of printing '..'. v3: --- 1. Move the histogram to a separate column 2. Move the svals[] out of struct stats v2: --- Jiri got a compile error, CC builtin-diff.o builtin-diff.c: In function ‘compute_cycles_diff’: builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value] 712 | labs(pair->block_info->cycles_spark[i] - | ^~~~ Because the result of u64 - u64 is still u64. Now we change the type of cycles_spark[] to s64. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
253 lines
6.9 KiB
Plaintext
253 lines
6.9 KiB
Plaintext
perf-y += annotate.o
|
|
perf-y += block-range.o
|
|
perf-y += build-id.o
|
|
perf-y += cacheline.o
|
|
perf-y += config.o
|
|
perf-y += copyfile.o
|
|
perf-y += ctype.o
|
|
perf-y += db-export.o
|
|
perf-y += env.o
|
|
perf-y += event.o
|
|
perf-y += evlist.o
|
|
perf-y += evsel.o
|
|
perf-y += evsel_fprintf.o
|
|
perf-y += perf_event_attr_fprintf.o
|
|
perf-y += evswitch.o
|
|
perf-y += find_bit.o
|
|
perf-y += get_current_dir_name.o
|
|
perf-y += kallsyms.o
|
|
perf-y += levenshtein.o
|
|
perf-y += llvm-utils.o
|
|
perf-y += mmap.o
|
|
perf-y += memswap.o
|
|
perf-y += parse-events.o
|
|
perf-y += perf_regs.o
|
|
perf-y += path.o
|
|
perf-y += print_binary.o
|
|
perf-y += rlimit.o
|
|
perf-y += argv_split.o
|
|
perf-y += rbtree.o
|
|
perf-y += libstring.o
|
|
perf-y += bitmap.o
|
|
perf-y += hweight.o
|
|
perf-y += smt.o
|
|
perf-y += strbuf.o
|
|
perf-y += string.o
|
|
perf-y += strlist.o
|
|
perf-y += strfilter.o
|
|
perf-y += top.o
|
|
perf-y += usage.o
|
|
perf-y += dso.o
|
|
perf-y += dsos.o
|
|
perf-y += symbol.o
|
|
perf-y += symbol_fprintf.o
|
|
perf-y += color.o
|
|
perf-y += color_config.o
|
|
perf-y += metricgroup.o
|
|
perf-y += header.o
|
|
perf-y += callchain.o
|
|
perf-y += values.o
|
|
perf-y += debug.o
|
|
perf-y += machine.o
|
|
perf-y += map.o
|
|
perf-y += pstack.o
|
|
perf-y += session.o
|
|
perf-y += sample-raw.o
|
|
perf-y += s390-sample-raw.o
|
|
perf-$(CONFIG_TRACE) += syscalltbl.o
|
|
perf-y += ordered-events.o
|
|
perf-y += namespaces.o
|
|
perf-y += comm.o
|
|
perf-y += thread.o
|
|
perf-y += thread_map.o
|
|
perf-y += trace-event-parse.o
|
|
perf-y += parse-events-flex.o
|
|
perf-y += parse-events-bison.o
|
|
perf-y += pmu.o
|
|
perf-y += pmu-flex.o
|
|
perf-y += pmu-bison.o
|
|
perf-y += trace-event-read.o
|
|
perf-y += trace-event-info.o
|
|
perf-y += trace-event-scripting.o
|
|
perf-y += trace-event.o
|
|
perf-y += svghelper.o
|
|
perf-y += sort.o
|
|
perf-y += hist.o
|
|
perf-y += util.o
|
|
perf-y += cpumap.o
|
|
perf-y += cputopo.o
|
|
perf-y += cgroup.o
|
|
perf-y += target.o
|
|
perf-y += rblist.o
|
|
perf-y += intlist.o
|
|
perf-y += vdso.o
|
|
perf-y += counts.o
|
|
perf-y += stat.o
|
|
perf-y += stat-shadow.o
|
|
perf-y += stat-display.o
|
|
perf-y += record.o
|
|
perf-y += srcline.o
|
|
perf-y += srccode.o
|
|
perf-y += synthetic-events.o
|
|
perf-y += data.o
|
|
perf-y += tsc.o
|
|
perf-y += cloexec.o
|
|
perf-y += call-path.o
|
|
perf-y += rwsem.o
|
|
perf-y += thread-stack.o
|
|
perf-y += spark.o
|
|
perf-$(CONFIG_AUXTRACE) += auxtrace.o
|
|
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
|
|
perf-$(CONFIG_AUXTRACE) += intel-pt.o
|
|
perf-$(CONFIG_AUXTRACE) += intel-bts.o
|
|
perf-$(CONFIG_AUXTRACE) += arm-spe.o
|
|
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
|
|
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
|
|
|
|
ifdef CONFIG_LIBOPENCSD
|
|
perf-$(CONFIG_AUXTRACE) += cs-etm.o
|
|
perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
|
|
endif
|
|
|
|
perf-y += parse-branch-options.o
|
|
perf-y += dump-insn.o
|
|
perf-y += parse-regs-options.o
|
|
perf-y += term.o
|
|
perf-y += help-unknown-cmd.o
|
|
perf-y += mem-events.o
|
|
perf-y += vsprintf.o
|
|
perf-y += units.o
|
|
perf-y += time-utils.o
|
|
perf-y += expr-bison.o
|
|
perf-y += branch.o
|
|
perf-y += mem2node.o
|
|
|
|
perf-$(CONFIG_LIBBPF) += bpf-loader.o
|
|
perf-$(CONFIG_LIBBPF) += bpf_map.o
|
|
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
|
|
perf-$(CONFIG_LIBELF) += symbol-elf.o
|
|
perf-$(CONFIG_LIBELF) += probe-file.o
|
|
perf-$(CONFIG_LIBELF) += probe-event.o
|
|
|
|
ifndef CONFIG_LIBELF
|
|
perf-y += symbol-minimal.o
|
|
endif
|
|
|
|
ifndef CONFIG_SETNS
|
|
perf-y += setns.o
|
|
endif
|
|
|
|
perf-$(CONFIG_DWARF) += probe-finder.o
|
|
perf-$(CONFIG_DWARF) += dwarf-aux.o
|
|
perf-$(CONFIG_DWARF) += dwarf-regs.o
|
|
|
|
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
|
|
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
|
|
perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
|
|
perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
|
|
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
|
|
|
|
perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
|
|
|
|
perf-y += scripting-engines/
|
|
|
|
perf-$(CONFIG_ZLIB) += zlib.o
|
|
perf-$(CONFIG_LZMA) += lzma.o
|
|
perf-$(CONFIG_ZSTD) += zstd.o
|
|
|
|
perf-$(CONFIG_LIBCAP) += cap.o
|
|
|
|
perf-y += demangle-java.o
|
|
perf-y += demangle-rust.o
|
|
|
|
ifdef CONFIG_JITDUMP
|
|
perf-$(CONFIG_LIBELF) += jitdump.o
|
|
perf-$(CONFIG_LIBELF) += genelf.o
|
|
perf-$(CONFIG_DWARF) += genelf_debug.o
|
|
endif
|
|
|
|
perf-y += perf-hooks.o
|
|
|
|
perf-$(CONFIG_LIBBPF) += bpf-event.o
|
|
|
|
perf-$(CONFIG_CXX) += c++/
|
|
|
|
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
|
|
|
|
# avoid compiler warnings in 32-bit mode
|
|
CFLAGS_genelf_debug.o += -Wno-packed
|
|
|
|
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
|
|
$(call rule_mkdir)
|
|
$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
|
|
|
|
$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
|
|
$(call rule_mkdir)
|
|
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
|
|
|
|
$(OUTPUT)util/expr-bison.c: util/expr.y
|
|
$(call rule_mkdir)
|
|
$(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
|
|
|
|
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
|
|
$(call rule_mkdir)
|
|
$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
|
|
|
|
$(OUTPUT)util/pmu-bison.c: util/pmu.y
|
|
$(call rule_mkdir)
|
|
$(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
|
|
|
|
CFLAGS_parse-events-flex.o += -w
|
|
CFLAGS_pmu-flex.o += -w
|
|
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
|
|
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
|
|
CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
|
|
|
|
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
|
|
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
|
|
|
|
CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
|
|
CFLAGS_parse-events.o += -Wno-redundant-decls
|
|
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
|
|
|
|
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|
|
|
|
$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
|
|
$(call rule_mkdir)
|
|
$(call if_changed_dep,cc_o_c)
|