perf tools fixes for 5.11:

- Fix 'CPU too large' error in Intel PT.
 
 - Correct event attribute sizes in 'perf inject'.
 
 - Sync build_bug.h and kvm.h kernel copies.
 
 - Fix bpf.h header include directive in 5sec.c 'perf trace' bpf example.
 
 - libbpf tests fixes.
 
 - Fix shadow stat 'perf test' for non-bash shells.
 
 - Take cgroups into account for shadow stats in 'perf stat'.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 
 Test results:
 
 The first ones are container based builds of tools/perf with and without libelf
 support.  Where clang is available, it is also used to build perf with/without
 libelf, and building with LIBCLANGLLVM=1 (built-in clang) with gcc and clang
 when clang and its devel libraries are installed.
 
 The objtool and samples/bpf/ builds are disabled now that I'm switching from
 using the sources in a local volume to fetching them from a http server to
 build it inside the container, to make it easier to build in a container cluster.
 Those will come back later.
 
 Several are cross builds, the ones with -x-ARCH and the android one, and those
 may not have all the features built, due to lack of multi-arch devel packages,
 available and being used so far on just a few, like
 debian:experimental-x-{arm64,mipsel}.
 
 The 'perf test' one will perform a variety of tests exercising
 tools/perf/util/, tools/lib/{bpf,traceevent,etc}, as well as run perf commands
 with a variety of command line event specifications to then intercept the
 sys_perf_event syscall to check that the perf_event_attr fields are set up as
 expected, among a variety of other unit tests.
 
 Then there is the 'make -C tools/perf build-test' ones, that build tools/perf/
 with a variety of feature sets, exercising the build with an incomplete set of
 features as well as with a complete one. It is planned to have it run on each
 of the containers mentioned above, using some container orchestration
 infrastructure. Get in contact if interested in helping having this in place.
 
   $ grep "model name" -m1 /proc/cpuinfo
   model name: AMD Ryzen 9 3900X 12-Core Processor
   # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.11.0-rc3.tar.xz
   # dm
    1    66.93 alpine:3.4                    : Ok   gcc (Alpine 5.3.0) 5.3.0, clang version 3.8.0 (tags/RELEASE_380/final)
    2    68.65 alpine:3.5                    : Ok   gcc (Alpine 6.2.1) 6.2.1 20160822, clang version 3.8.1 (tags/RELEASE_381/final)
    3    73.00 alpine:3.6                    : Ok   gcc (Alpine 6.3.0) 6.3.0, clang version 4.0.0 (tags/RELEASE_400/final)
    4    79.04 alpine:3.7                    : Ok   gcc (Alpine 6.4.0) 6.4.0, Alpine clang version 5.0.0 (tags/RELEASE_500/final) (based on LLVM 5.0.0)
    5    79.71 alpine:3.8                    : Ok   gcc (Alpine 6.4.0) 6.4.0, Alpine clang version 5.0.1 (tags/RELEASE_501/final) (based on LLVM 5.0.1)
    6    82.51 alpine:3.9                    : Ok   gcc (Alpine 8.3.0) 8.3.0, Alpine clang version 5.0.1 (tags/RELEASE_502/final) (based on LLVM 5.0.1)
    7   103.45 alpine:3.10                   : Ok   gcc (Alpine 8.3.0) 8.3.0, Alpine clang version 8.0.0 (tags/RELEASE_800/final) (based on LLVM 8.0.0)
    8   113.86 alpine:3.11                   : Ok   gcc (Alpine 9.3.0) 9.3.0, Alpine clang version 9.0.0 (https://git.alpinelinux.org/aports f7f0d2c2b8bcd6a5843401a9a702029556492689) (based on LLVM 9.0.0)
    9   109.31 alpine:3.12                   : Ok   gcc (Alpine 9.3.0) 9.3.0, Alpine clang version 10.0.0 (https://gitlab.alpinelinux.org/alpine/aports.git 7445adce501f8473efdb93b17b5eaf2f1445ed4c)
   10   113.90 alpine:edge                   : Ok   gcc (Alpine 10.2.0) 10.2.0, Alpine clang version 10.0.1
   11    66.76 alt:p8                        : Ok   x86_64-alt-linux-gcc (GCC) 5.3.1 20151207 (ALT p8 5.3.1-alt3.M80P.1), clang version 3.8.0 (tags/RELEASE_380/final)
   12    83.71 alt:p9                        : Ok   x86_64-alt-linux-gcc (GCC) 8.4.1 20200305 (ALT p9 8.4.1-alt0.p9.1), clang version 10.0.0
   13    80.70 alt:sisyphus                  : Ok   x86_64-alt-linux-gcc (GCC) 9.3.1 20200518 (ALT Sisyphus 9.3.1-alt1), clang version 10.0.1
   14    62.75 amazonlinux:1                 : Ok   gcc (GCC) 7.2.1 20170915 (Red Hat 7.2.1-2), clang version 3.6.2 (tags/RELEASE_362/final)
   15    97.65 amazonlinux:2                 : Ok   gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12), clang version 7.0.1 (Amazon Linux 2 7.0.1-1.amzn2.0.2)
   16    21.18 android-ndk:r12b-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
   17    21.07 android-ndk:r15c-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
   18    25.83 centos:6                      : Ok   gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
   19    30.65 centos:7                      : Ok   gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
   20    93.44 centos:8                      : Ok   gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5), clang version 10.0.1 (Red Hat 10.0.1-1.module_el8.3.0+467+cb298d5b)
   21    60.64 clearlinux:latest             : Ok   gcc (Clear Linux OS for Intel Architecture) 10.2.1 20201217 releases/gcc-10.2.0-643-g7cbb07d2fc, clang version 10.0.1
   22    74.57 debian:8                      : Ok   gcc (Debian 4.9.2-10+deb8u2) 4.9.2, Debian clang version 3.5.0-10 (tags/RELEASE_350/final) (based on LLVM 3.5.0)
   23    75.40 debian:9                      : Ok   gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, clang version 3.8.1-24 (tags/RELEASE_381/final)
   24    72.75 debian:10                     : Ok   gcc (Debian 8.3.0-6) 8.3.0, clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)
   25    72.36 debian:experimental           : Ok   gcc (Debian 10.2.1-6) 10.2.1 20210110, Debian clang version 11.0.1-2
   26    32.35 debian:experimental-x-arm64   : Ok   aarch64-linux-gnu-gcc (Debian 10.2.1-6) 10.2.1 20210110
   27    28.65 debian:experimental-x-mips64  : Ok   mips64-linux-gnuabi64-gcc (Debian 10.2.1-3) 10.2.1 20201224
   28    13.79 debian:experimental-x-mipsel  : FAIL mipsel-linux-gnu-gcc (Debian 10.2.1-3) 10.2.1 20201224
 
       CC       /tmp/build/perf/util/map.o
     util/map.c: In function 'map__new':
     util/map.c:109:5: error: '%s' directive output may be truncated writing between 1 and 2147483645 bytes into a region of size 4096 [-Werror=format-truncation=]
       109 |    "%s/platforms/%s/arch-%s/usr/lib/%s",
           |     ^~
     In file included from /usr/mipsel-linux-gnu/include/stdio.h:867,
                      from util/symbol.h:11,
                      from util/map.c:2:
     /usr/mipsel-linux-gnu/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 32 or more bytes (assuming 4294967321) into a destination of size 4096
        67 |   return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
           |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        68 |        __bos (__s), __fmt, __va_arg_pack ());
           |        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
   29    29.14 fedora:20                     : Ok   gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7)
   30    30.66 fedora:22                     : Ok   gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6), clang version 3.5.0 (tags/RELEASE_350/final)
   31    66.33 fedora:23                     : Ok   gcc (GCC) 5.3.1 20160406 (Red Hat 5.3.1-6), clang version 3.7.0 (tags/RELEASE_370/final)
   32    77.51 fedora:24                     : Ok   gcc (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1), clang version 3.8.1 (tags/RELEASE_381/final)
   33    25.23 fedora:24-x-ARC-uClibc        : Ok   arc-linux-gcc (ARCompact ISA Linux uClibc toolchain 2017.09-rc2) 7.1.1 20170710
   34    79.68 fedora:25                     : Ok   gcc (GCC) 6.4.1 20170727 (Red Hat 6.4.1-1), clang version 3.9.1 (tags/RELEASE_391/final)
   35    93.09 fedora:26                     : Ok   gcc (GCC) 7.3.1 20180130 (Red Hat 7.3.1-2), clang version 4.0.1 (tags/RELEASE_401/final)
   36    94.12 fedora:27                     : Ok   gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-6), clang version 5.0.2 (tags/RELEASE_502/final)
   37   101.97 fedora:28                     : Ok   gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2), clang version 6.0.1 (tags/RELEASE_601/final)
   38   107.51 fedora:29                     : Ok   gcc (GCC) 8.3.1 20190223 (Red Hat 8.3.1-2), clang version 7.0.1 (Fedora 7.0.1-6.fc29)
   39   111.24 fedora:30                     : Ok   gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2), clang version 8.0.0 (Fedora 8.0.0-3.fc30)
   40    25.85 fedora:30-x-ARC-uClibc        : Ok   arc-linux-gcc (ARCv2 ISA Linux uClibc toolchain 2019.03-rc1) 8.3.1 20190225
   41   110.61 fedora:31                     : Ok   gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2), clang version 9.0.1 (Fedora 9.0.1-4.fc31)
   42    93.78 fedora:32                     : Ok   gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 10.0.1 (Fedora 10.0.1-3.fc32)
   43    91.51 fedora:33                     : Ok   gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9), clang version 11.0.0 (Fedora 11.0.0-2.fc33)
   44    92.75 fedora:34                     : Ok   gcc (GCC) 11.0.0 20210113 (Red Hat 11.0.0-0), clang version 11.0.1 (Fedora 11.0.1-4.fc34)
   45    92.33 fedora:rawhide                : Ok   gcc (GCC) 11.0.0 20210109 (Red Hat 11.0.0-0), clang version 11.0.1 (Fedora 11.0.1-4.fc34)
   46    33.58 gentoo-stage3-amd64:latest    : Ok   gcc (Gentoo 9.3.0-r1 p3) 9.3.0
   47    66.03 mageia:5                      : Ok   gcc (GCC) 4.9.2, clang version 3.5.2 (tags/RELEASE_352/final)
   48    84.73 mageia:6                      : Ok   gcc (Mageia 5.5.0-1.mga6) 5.5.0, clang version 3.9.1 (tags/RELEASE_391/final)
   49    98.35 manjaro:latest                : Ok   gcc (GCC) 10.2.0, clang version 10.0.1
   50   223.15 openmandriva:cooker           : Ok   gcc (GCC) 10.2.0 20200723 (OpenMandriva), OpenMandriva 11.0.0-1 clang version 11.0.0 (/builddir/build/BUILD/llvm-project-llvmorg-11.0.0/clang 63e22714ac938c6b537bd958f70680d3331a2030)
   51   117.30 opensuse:15.0                 : Ok   gcc (SUSE Linux) 7.4.1 20190905 [gcc-7-branch revision 275407], clang version 5.0.1 (tags/RELEASE_501/final 312548)
   52   124.82 opensuse:15.1                 : Ok   gcc (SUSE Linux) 7.5.0, clang version 7.0.1 (tags/RELEASE_701/final 349238)
   53   113.33 opensuse:15.2                 : Ok   gcc (SUSE Linux) 7.5.0, clang version 9.0.1
   54   106.17 opensuse:42.3                 : Ok   gcc (SUSE Linux) 4.8.5, clang version 3.8.0 (tags/RELEASE_380/final 262553)
   55   108.15 opensuse:tumbleweed           : Ok   gcc (SUSE Linux) 10.2.1 20200825 [revision c0746a1beb1ba073c7981eb09f55b3d993b32e5c], clang version 10.0.1
   56    25.57 oraclelinux:6                 : Ok   gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1)
   57    30.86 oraclelinux:7                 : Ok   gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44.0.3)
   58    91.75 oraclelinux:8                 : Ok   gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5.0.1), clang version 10.0.1 (Red Hat 10.0.1-1.0.1.module+el8.3.0+7827+89335dbf)
   59    27.64 ubuntu:12.04                  : Ok   gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3, Ubuntu clang version 3.0-6ubuntu3 (tags/RELEASE_30/final) (based on LLVM 3.0)
   60    29.65 ubuntu:14.04                  : Ok   gcc (Ubuntu 4.8.4-2ubuntu1~14.04.4) 4.8.4
   61    75.65 ubuntu:16.04                  : Ok   gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609, clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)
   62    25.57 ubuntu:16.04-x-arm            : Ok   arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   63    25.52 ubuntu:16.04-x-arm64          : Ok   aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   64    25.01 ubuntu:16.04-x-powerpc        : Ok   powerpc-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   65    25.51 ubuntu:16.04-x-powerpc64      : Ok   powerpc64-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   66    25.70 ubuntu:16.04-x-powerpc64el    : Ok   powerpc64le-linux-gnu-gcc (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   67    24.95 ubuntu:16.04-x-s390           : Ok   s390x-linux-gnu-gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
   68    87.96 ubuntu:18.04                  : Ok   gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0, clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
   69    27.40 ubuntu:18.04-x-arm            : Ok   arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0
   70    27.14 ubuntu:18.04-x-arm64          : Ok   aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 7.5.0
   71    22.68 ubuntu:18.04-x-m68k           : Ok   m68k-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   72    26.52 ubuntu:18.04-x-powerpc        : Ok   powerpc-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   73    28.97 ubuntu:18.04-x-powerpc64      : Ok   powerpc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   74    28.54 ubuntu:18.04-x-powerpc64el    : Ok   powerpc64le-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   75   163.57 ubuntu:18.04-x-riscv64        : Ok   riscv64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   76    24.07 ubuntu:18.04-x-s390           : Ok   s390x-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   77    26.77 ubuntu:18.04-x-sh4            : Ok   sh4-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   78    24.00 ubuntu:18.04-x-sparc64        : Ok   sparc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
   79    69.36 ubuntu:19.10                  : Ok   gcc (Ubuntu 9.2.1-9ubuntu2) 9.2.1 20191008, clang version 8.0.1-3build1 (tags/RELEASE_801/final)
   80    27.07 ubuntu:19.10-x-alpha          : Ok   alpha-linux-gnu-gcc (Ubuntu 9.2.1-9ubuntu1) 9.2.1 20191008
   81    24.29 ubuntu:19.10-x-hppa           : Ok   hppa-linux-gnu-gcc (Ubuntu 9.2.1-9ubuntu1) 9.2.1 20191008
   82    74.99 ubuntu:20.04                  : Ok   gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, clang version 10.0.0-4ubuntu1
   83    30.49 ubuntu:20.04-x-powerpc64el    : Ok   powerpc64le-linux-gnu-gcc (Ubuntu 10.2.0-5ubuntu1~20.04) 10.2.0
   84    73.54 ubuntu:20.10                  : Ok   gcc (Ubuntu 10.2.0-13ubuntu1) 10.2.0, Ubuntu clang version 11.0.0-2
   $
 
   # uname -a
   Linux quaco 5.10.7-100.fc32.x86_64 #1 SMP Tue Jan 12 20:25:28 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
   # git log --oneline -1
   648b054a46 perf inject: Correct event attribute sizes
   # perf version --build-options
   perf version 5.11.rc3.g648b054a4647
                    dwarf: [ on  ]  # HAVE_DWARF_SUPPORT
       dwarf_getlocations: [ on  ]  # HAVE_DWARF_GETLOCATIONS_SUPPORT
                    glibc: [ on  ]  # HAVE_GLIBC_SUPPORT
            syscall_table: [ on  ]  # HAVE_SYSCALL_TABLE_SUPPORT
                   libbfd: [ on  ]  # HAVE_LIBBFD_SUPPORT
                   libelf: [ on  ]  # HAVE_LIBELF_SUPPORT
                  libnuma: [ on  ]  # HAVE_LIBNUMA_SUPPORT
   numa_num_possible_cpus: [ on  ]  # HAVE_LIBNUMA_SUPPORT
                  libperl: [ on  ]  # HAVE_LIBPERL_SUPPORT
                libpython: [ on  ]  # HAVE_LIBPYTHON_SUPPORT
                 libslang: [ on  ]  # HAVE_SLANG_SUPPORT
                libcrypto: [ on  ]  # HAVE_LIBCRYPTO_SUPPORT
                libunwind: [ on  ]  # HAVE_LIBUNWIND_SUPPORT
       libdw-dwarf-unwind: [ on  ]  # HAVE_DWARF_SUPPORT
                     zlib: [ on  ]  # HAVE_ZLIB_SUPPORT
                     lzma: [ on  ]  # HAVE_LZMA_SUPPORT
                get_cpuid: [ on  ]  # HAVE_AUXTRACE_SUPPORT
                      bpf: [ on  ]  # HAVE_LIBBPF_SUPPORT
                      aio: [ on  ]  # HAVE_AIO_SUPPORT
                     zstd: [ on  ]  # HAVE_ZSTD_SUPPORT
                  libpfm4: [ OFF ]  # HAVE_LIBPFM
   # perf test
    1: vmlinux symtab matches kallsyms                                 : Ok
    2: Detect openat syscall event                                     : Ok
    3: Detect openat syscall event on all cpus                         : Ok
    4: Read samples using the mmap interface                           : Ok
    5: Test data source output                                         : Ok
    6: Parse event definition strings                                  : Ok
    7: Simple expression parser                                        : Ok
    8: PERF_RECORD_* events & perf_sample fields                       : Ok
    9: Parse perf pmu format                                           : Ok
   10: PMU events                                                      :
   10.1: PMU event table sanity                                        : Ok
   10.2: PMU event map aliases                                         : Ok
   10.3: Parsing of PMU event table metrics                            : Ok
   10.4: Parsing of PMU event table metrics with fake PMUs             : Ok
   11: DSO data read                                                   : Ok
   12: DSO data cache                                                  : Ok
   13: DSO data reopen                                                 : Ok
   14: Roundtrip evsel->name                                           : Ok
   15: Parse sched tracepoints fields                                  : Ok
   16: syscalls:sys_enter_openat event fields                          : Ok
   17: Setup struct perf_event_attr                                    : Ok
   18: Match and link multiple hists                                   : Ok
   19: 'import perf' in python                                         : Ok
   20: Breakpoint overflow signal handler                              : Ok
   21: Breakpoint overflow sampling                                    : Ok
   22: Breakpoint accounting                                           : Ok
   23: Watchpoint                                                      :
   23.1: Read Only Watchpoint                                          : Skip (missing hardware support)
   23.2: Write Only Watchpoint                                         : Ok
   23.3: Read / Write Watchpoint                                       : Ok
   23.4: Modify Watchpoint                                             : Ok
   24: Number of exit events of a simple workload                      : Ok
   25: Software clock events period values                             : Ok
   26: Object code reading                                             : Ok
   27: Sample parsing                                                  : Ok
   28: Use a dummy software event to keep tracking                     : Ok
   29: Parse with no sample_id_all bit set                             : Ok
   30: Filter hist entries                                             : Ok
   31: Lookup mmap thread                                              : Ok
   32: Share thread maps                                               : Ok
   33: Sort output of hist entries                                     : Ok
   34: Cumulate child hist entries                                     : Ok
   35: Track with sched_switch                                         : Ok
   36: Filter fds with revents mask in a fdarray                       : Ok
   37: Add fd to a fdarray, making it autogrow                         : Ok
   38: kmod_path__parse                                                : Ok
   39: Thread map                                                      : Ok
   40: LLVM search and compile                                         :
   40.1: Basic BPF llvm compile                                        : Ok
   40.2: kbuild searching                                              : Ok
   40.3: Compile source for BPF prologue generation                    : Ok
   40.4: Compile source for BPF relocation                             : Ok
   41: Session topology                                                : Ok
   42: BPF filter                                                      :
   42.1: Basic BPF filtering                                           : Ok
   42.2: BPF pinning                                                   : Ok
   42.3: BPF prologue generation                                       : Ok
   42.4: BPF relocation checker                                        : Ok
   43: Synthesize thread map                                           : Ok
   44: Remove thread map                                               : Ok
   45: Synthesize cpu map                                              : Ok
   46: Synthesize stat config                                          : Ok
   47: Synthesize stat                                                 : Ok
   48: Synthesize stat round                                           : Ok
   49: Synthesize attr update                                          : Ok
   50: Event times                                                     : Ok
   51: Read backward ring buffer                                       : Ok
   52: Print cpu map                                                   : Ok
   53: Merge cpu map                                                   : Ok
   54: Probe SDT events                                                : Ok
   55: is_printable_array                                              : Ok
   56: Print bitmap                                                    : Ok
   57: perf hooks                                                      : Ok
   58: builtin clang support                                           : Skip (not compiled in)
   59: unit_number__scnprintf                                          : Ok
   60: mem2node                                                        : Ok
   61: time utils                                                      : Ok
   62: Test jit_write_elf                                              : Ok
   63: Test libpfm4 support                                            : Skip (not compiled in)
   64: Test api io                                                     : Ok
   65: maps__merge_in                                                  : Ok
   66: Demangle Java                                                   : Ok
   67: Parse and process metrics                                       : Ok
   68: PE file support                                                 : Ok
   69: Event expansion for cgroups                                     : Ok
   70: Convert perf time to TSC                                        : Ok
   71: x86 rdpmc                                                       : Ok
   72: DWARF unwind                                                    : Ok
   73: x86 instruction decoder - new instructions                      : Ok
   74: Intel PT packet decoder                                         : Ok
   75: x86 bp modify                                                   : Ok
   76: probe libc's inet_pton & backtrace it with ping                 : Ok
   77: Use vfs_getname probe to get syscall args filenames             : Ok
   78: Check Arm CoreSight trace data recording and synthesized samples: Skip
   79: perf stat metrics (shadow stat) test                            : Ok
   80: build id cache operations                                       : Ok
   81: Add vfs_getname probe to get syscall args filenames             : Ok
   82: Check open filename arg using perf trace + vfs_getname          : Ok
   83: Zstd perf.data compression/decompression                        : Ok
 
   $ make -C tools/perf build-test
   make: Entering directory '/home/acme/git/perf/tools/perf'
   - tarpkg: ./tests/perf-targz-src-pkg .
            make_no_libpython_O: make NO_LIBPYTHON=1
                  make_no_sdt_O: make NO_SDT=1
                    make_tags_O: make tags
                 make_install_O: make install
             make_install_bin_O: make install-bin
                   make_debug_O: make DEBUG=1
   make_no_libdw_dwarf_unwind_O: make NO_LIBDW_DWARF_UNWIND=1
               make_no_libelf_O: make NO_LIBELF=1
                  make_cscope_O: make cscope
            make_no_backtrace_O: make NO_BACKTRACE=1
              make_no_libnuma_O: make NO_LIBNUMA=1
                   make_no_ui_O: make NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
                 make_no_newt_O: make NO_NEWT=1
         make_with_babeltrace_O: make LIBBABELTRACE=1
        make_util_pmu_bison_o_O: make util/pmu-bison.o
            make_no_libunwind_O: make NO_LIBUNWIND=1
         make_no_libbpf_DEBUG_O: make NO_LIBBPF=1 DEBUG=1
                     make_doc_O: make doc
                  make_perf_o_O: make perf.o
                 make_no_gtk2_O: make NO_GTK2=1
          make_with_clangllvm_O: make LIBCLANGLLVM=1
               make_clean_all_O: make clean all
             make_no_demangle_O: make NO_DEMANGLE=1
               make_with_gtk2_O: make GTK2=1
              make_util_map_o_O: make util/map.o
                    make_pure_O: make
            make_no_libbionic_O: make NO_LIBBIONIC=1
             make_no_libaudit_O: make NO_LIBAUDIT=1
               make_no_libbpf_O: make NO_LIBBPF=1
    make_install_prefix_slash_O: make install prefix=/tmp/krava/
                    make_help_O: make help
          make_no_syscall_tbl_O: make NO_SYSCALL_TABLE=1
              make_no_scripts_O: make NO_LIBPYTHON=1 NO_LIBPERL=1
                 make_minimal_O: make NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 NO_LIBCAP=1 NO_SYSCALL_TABLE=1
            make_no_libcrypto_O: make NO_LIBCRYPTO=1
                  make_static_O: make LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1
          make_install_prefix_O: make install prefix=/tmp/krava
             make_no_auxtrace_O: make NO_AUXTRACE=1
            make_with_libpfm4_O: make LIBPFM4=1
              make_no_libperl_O: make NO_LIBPERL=1
                make_no_slang_O: make NO_SLANG=1
   OK
   make: Leaving directory '/home/acme/git/perf/tools/perf'
   $
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYASljgAKCRCyPKLppCJ+
 J/E/AQCOGFqF7UmEzuuTecWeeBNCwVyD3woHLU13ll/e5VLNggD/YD9t8CZS+vwy
 21yL4/yXZloLFE48OCLRNWeq91FL/gs=
 =uZDD
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-2021-01-17' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix 'CPU too large' error in Intel PT

 - Correct event attribute sizes in 'perf inject'

 - Sync build_bug.h and kvm.h kernel copies

 - Fix bpf.h header include directive in 5sec.c 'perf trace' bpf example

 - libbpf tests fixes

 - Fix shadow stat 'perf test' for non-bash shells

 - Take cgroups into account for shadow stats in 'perf stat'

* tag 'perf-tools-fixes-2021-01-17' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf inject: Correct event attribute sizes
  perf intel-pt: Fix 'CPU too large' error
  perf stat: Take cgroups into account for shadow stats
  perf stat: Introduce struct runtime_stat_data
  libperf tests: Fail when failing to get a tracepoint id
  libperf tests: If a test fails return non-zero
  libperf tests: Avoid uninitialized variable warning
  perf test: Fix shadow stat test for non-bash shells
  tools headers: Syncronize linux/build_bug.h with the kernel sources
  tools headers UAPI: Sync kvm.h headers with the kernel sources
  perf bpf examples: Fix bpf.h header include directive in 5sec.c example
This commit is contained in:
Linus Torvalds 2021-01-17 13:14:46 -08:00
commit e2da783614
12 changed files with 224 additions and 208 deletions

View File

@ -79,9 +79,4 @@
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
#endif // static_assert
#ifdef __GENKSYMS__
/* genksyms gets confused by _Static_assert */
#define _Static_assert(expr, ...)
#endif
#endif /* _LINUX_BUILD_BUG_H */

View File

@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_X86_RDMSR 29
#define KVM_EXIT_X86_WRMSR 30
#define KVM_EXIT_DIRTY_RING_FULL 31
#define KVM_EXIT_AP_RESET_HOLD 32
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@ -573,6 +574,7 @@ struct kvm_vapic_addr {
#define KVM_MP_STATE_CHECK_STOP 6
#define KVM_MP_STATE_OPERATING 7
#define KVM_MP_STATE_LOAD 8
#define KVM_MP_STATE_AP_RESET_HOLD 9
struct kvm_mp_state {
__u32 mp_state;

View File

@ -27,5 +27,5 @@ int main(int argc, char **argv)
perf_cpu_map__put(cpus);
__T_END;
return 0;
return tests_failed == 0 ? 0 : -1;
}

View File

@ -208,13 +208,13 @@ static int test_mmap_thread(void)
char path[PATH_MAX];
int id, err, pid, go_pipe[2];
union perf_event *event;
char bf;
int count = 0;
snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id",
sysfs__mountpoint());
if (filename__read_int(path, &id)) {
tests_failed++;
fprintf(stderr, "error: failed to get tracepoint id: %s\n", path);
return -1;
}
@ -229,6 +229,7 @@ static int test_mmap_thread(void)
pid = fork();
if (!pid) {
int i;
char bf;
read(go_pipe[0], &bf, 1);
@ -266,7 +267,7 @@ static int test_mmap_thread(void)
perf_evlist__enable(evlist);
/* kick the child and wait for it to finish */
write(go_pipe[1], &bf, 1);
write(go_pipe[1], "A", 1);
waitpid(pid, NULL, 0);
/*
@ -409,5 +410,5 @@ int main(int argc, char **argv)
test_mmap_cpus();
__T_END;
return 0;
return tests_failed == 0 ? 0 : -1;
}

View File

@ -131,5 +131,5 @@ int main(int argc, char **argv)
test_stat_thread_enable();
__T_END;
return 0;
return tests_failed == 0 ? 0 : -1;
}

View File

@ -27,5 +27,5 @@ int main(int argc, char **argv)
perf_thread_map__put(threads);
__T_END;
return 0;
return tests_failed == 0 ? 0 : -1;
}

View File

@ -39,7 +39,7 @@
Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
*/
#include <bpf/bpf.h>
#include <bpf.h>
#define NSEC_PER_SEC 1000000000L

View File

@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2
test_global_aggr()
{
local cyc
perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \
grep -e cycles -e instructions | \
while read num evt hash ipc rest
do
# skip not counted events
if [[ $num == "<not" ]]; then
if [ "$num" = "<not" ]; then
continue
fi
# save cycles count
if [[ $evt == "cycles" ]]; then
if [ "$evt" = "cycles" ]; then
cyc=$num
continue
fi
# skip if no cycles
if [[ -z $cyc ]]; then
if [ -z "$cyc" ]; then
continue
fi
# use printf for rounding and a leading zero
local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
if [[ $ipc != $res ]]; then
res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
if [ "$ipc" != "$res" ]; then
echo "IPC is different: $res != $ipc ($num / $cyc)"
exit 1
fi
@ -42,32 +40,32 @@ test_global_aggr()
test_no_aggr()
{
declare -A results
perf stat -a -A --no-big-num -e cycles,instructions sleep 1 2>&1 | \
grep ^CPU | \
while read cpu num evt hash ipc rest
do
# skip not counted events
if [[ $num == "<not" ]]; then
if [ "$num" = "<not" ]; then
continue
fi
# save cycles count
if [[ $evt == "cycles" ]]; then
results[$cpu]=$num
if [ "$evt" = "cycles" ]; then
results="$results $cpu:$num"
continue
fi
cyc=${results##* $cpu:}
cyc=${cyc%% *}
# skip if no cycles
local cyc=${results[$cpu]}
if [[ -z $cyc ]]; then
if [ -z "$cyc" ]; then
continue
fi
# use printf for rounding and a leading zero
local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
if [[ $ipc != $res ]]; then
res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
if [ "$ipc" != "$res" ]; then
echo "IPC is different for $cpu: $res != $ipc ($num / $cyc)"
exit 1
fi

View File

@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session,
attr_offset = lseek(ff.fd, 0, SEEK_CUR);
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.size < sizeof(evsel->core.attr)) {
/*
* We are likely in "perf inject" and have read
* from an older file. Update attr size so that
* reader gets the right offset to the ids.
*/
evsel->core.attr.size = sizeof(evsel->core.attr);
}
f_attr = (struct perf_file_attr){
.attr = evsel->core.attr,
.ids = {

View File

@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines,
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid)
return -1;
@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
pid_t tid)
{
struct thread *thread;
int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS);
if (cpu < 0)
return -EINVAL;

View File

@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
{
int i, err = -1;
struct perf_cpu_map *map;
int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS);
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;

View File

@ -8,6 +8,7 @@
#include "evlist.h"
#include "expr.h"
#include "metricgroup.h"
#include "cgroup.h"
#include <linux/zalloc.h>
/*
@ -28,6 +29,7 @@ struct saved_value {
enum stat_type type;
int ctx;
int cpu;
struct cgroup *cgrp;
struct runtime_stat *stat;
struct stats stats;
u64 metric_total;
@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
if (a->ctx != b->ctx)
return a->ctx - b->ctx;
if (a->cgrp != b->cgrp)
return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
if (a->evsel == NULL && b->evsel == NULL) {
if (a->stat == b->stat)
return 0;
@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
bool create,
enum stat_type type,
int ctx,
struct runtime_stat *st)
struct runtime_stat *st,
struct cgroup *cgrp)
{
struct rblist *rblist;
struct rb_node *nd;
@ -110,10 +116,15 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel,
.type = type,
.ctx = ctx,
.stat = st,
.cgrp = cgrp,
};
rblist = &st->value_list;
/* don't use context info for clock events */
if (type == STAT_NSECS)
dm.ctx = 0;
nd = rblist__find(rblist, &dm);
if (nd)
return container_of(nd, struct saved_value, rb_node);
@ -191,12 +202,18 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
reset_stat(st);
}
struct runtime_stat_data {
int ctx;
struct cgroup *cgrp;
};
static void update_runtime_stat(struct runtime_stat *st,
enum stat_type type,
int ctx, int cpu, u64 count)
int cpu, u64 count,
struct runtime_stat_data *rsd)
{
struct saved_value *v = saved_value_lookup(NULL, cpu, true,
type, ctx, st);
struct saved_value *v = saved_value_lookup(NULL, cpu, true, type,
rsd->ctx, st, rsd->cgrp);
if (v)
update_stats(&v->stats, count);
@ -210,82 +227,86 @@ static void update_runtime_stat(struct runtime_stat *st,
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
int cpu, struct runtime_stat *st)
{
int ctx = evsel_context(counter);
u64 count_ns = count;
struct saved_value *v;
struct runtime_stat_data rsd = {
.ctx = evsel_context(counter),
.cgrp = counter->cgrp,
};
count *= counter->scale;
if (evsel__is_clock(counter))
update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TRANSACTION_START))
update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, ELISION_START))
update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
ctx, cpu, count);
cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
ctx, cpu, count);
cpu, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
ctx, cpu, count);
cpu, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
ctx, cpu, count);
cpu, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd);
else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd);
else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, SMI_NUM))
update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd);
else if (perf_stat_evsel__is(counter, APERF))
update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
update_runtime_stat(st, STAT_APERF, cpu, count, &rsd);
if (counter->collect_stat) {
v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st,
rsd.cgrp);
update_stats(&v->stats, count);
if (counter->metric_leader)
v->metric_total += count;
} else if (counter->metric_leader) {
v = saved_value_lookup(counter->metric_leader,
cpu, true, STAT_NONE, 0, st);
cpu, true, STAT_NONE, 0, st, rsd.cgrp);
v->metric_total += count;
v->metric_other++;
}
@ -422,11 +443,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
static double runtime_stat_avg(struct runtime_stat *st,
enum stat_type type, int ctx, int cpu)
enum stat_type type, int cpu,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@ -434,11 +456,12 @@ static double runtime_stat_avg(struct runtime_stat *st,
}
static double runtime_stat_n(struct runtime_stat *st,
enum stat_type type, int ctx, int cpu)
enum stat_type type, int cpu,
struct runtime_stat_data *rsd)
{
struct saved_value *v;
v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp);
if (!v)
return 0.0;
@ -446,16 +469,15 @@ static double runtime_stat_n(struct runtime_stat *st,
}
static void print_stalled_cycles_frontend(struct perf_stat_config *config,
int cpu,
struct evsel *evsel, double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -470,16 +492,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config,
}
static void print_stalled_cycles_backend(struct perf_stat_config *config,
int cpu,
struct evsel *evsel, double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -490,17 +511,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config,
}
static void print_branch_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -511,18 +530,15 @@ static void print_branch_misses(struct perf_stat_config *config,
}
static void print_l1_dcache_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -533,18 +549,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
}
static void print_l1_icache_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -554,17 +567,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -574,17 +585,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -594,17 +603,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
}
static void print_ll_cache_misses(struct perf_stat_config *config,
int cpu,
struct evsel *evsel,
double avg,
int cpu, double avg,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double total, ratio = 0.0;
const char *color;
int ctx = evsel_context(evsel);
total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd);
if (total)
ratio = avg / total * 100.0;
@ -662,56 +669,61 @@ static double sanitize_val(double x)
return x;
}
static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
static double td_total_slots(int cpu, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd);
}
static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
static double td_bad_spec(int cpu, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double bad_spec = 0;
double total_slots;
double total;
total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) -
runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) +
runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd);
total_slots = td_total_slots(ctx, cpu, st);
total_slots = td_total_slots(cpu, st, rsd);
if (total_slots)
bad_spec = total / total_slots;
return sanitize_val(bad_spec);
}
static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
static double td_retiring(int cpu, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double retiring = 0;
double total_slots = td_total_slots(ctx, cpu, st);
double total_slots = td_total_slots(cpu, st, rsd);
double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
ctx, cpu);
cpu, rsd);
if (total_slots)
retiring = ret_slots / total_slots;
return retiring;
}
static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
static double td_fe_bound(int cpu, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double fe_bound = 0;
double total_slots = td_total_slots(ctx, cpu, st);
double total_slots = td_total_slots(cpu, st, rsd);
double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
ctx, cpu);
cpu, rsd);
if (total_slots)
fe_bound = fetch_bub / total_slots;
return fe_bound;
}
static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
static double td_be_bound(int cpu, struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double sum = (td_fe_bound(ctx, cpu, st) +
td_bad_spec(ctx, cpu, st) +
td_retiring(ctx, cpu, st));
double sum = (td_fe_bound(cpu, st, rsd) +
td_bad_spec(cpu, st, rsd) +
td_retiring(cpu, st, rsd));
if (sum == 0)
return 0;
return sanitize_val(1.0 - sum);
@ -722,15 +734,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
* the ratios we need to recreate the sum.
*/
static double td_metric_ratio(int ctx, int cpu,
enum stat_type type,
struct runtime_stat *stat)
static double td_metric_ratio(int cpu, enum stat_type type,
struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
double d = runtime_stat_avg(stat, type, ctx, cpu);
double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) +
runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) +
runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) +
runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd);
double d = runtime_stat_avg(stat, type, cpu, rsd);
if (sum)
return d / sum;
@ -742,34 +754,33 @@ static double td_metric_ratio(int ctx, int cpu,
* We allow two missing.
*/
static bool full_td(int ctx, int cpu,
struct runtime_stat *stat)
static bool full_td(int cpu, struct runtime_stat *stat,
struct runtime_stat_data *rsd)
{
int c = 0;
if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0)
c++;
if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0)
c++;
if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0)
c++;
if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0)
c++;
return c >= 2;
}
static void print_smi_cost(struct perf_stat_config *config,
int cpu, struct evsel *evsel,
static void print_smi_cost(struct perf_stat_config *config, int cpu,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
struct runtime_stat *st,
struct runtime_stat_data *rsd)
{
double smi_num, aperf, cycles, cost = 0.0;
int ctx = evsel_context(evsel);
const char *color = NULL;
smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd);
aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd);
cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd);
if ((cycles == 0) || (aperf == 0))
return;
@ -804,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events,
scale = 1e-9;
} else {
v = saved_value_lookup(metric_events[i], cpu, false,
STAT_NONE, 0, st);
STAT_NONE, 0, st,
metric_events[i]->cgrp);
if (!v)
break;
stats = &v->stats;
@ -930,12 +942,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
const char *color = NULL;
int ctx = evsel_context(evsel);
struct runtime_stat_data rsd = {
.ctx = evsel_context(evsel),
.cgrp = evsel->cgrp,
};
struct metric_event *me;
int num = 1;
if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
if (total) {
ratio = avg / total;
@ -945,12 +960,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
}
total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
ctx, cpu);
total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd);
total = max(total, runtime_stat_avg(st,
STAT_STALLED_CYCLES_BACK,
ctx, cpu));
cpu, &rsd));
if (total && avg) {
out->new_line(config, ctxp);
@ -960,8 +974,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio);
}
} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
print_branch_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0)
print_branch_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
} else if (
@ -970,8 +984,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0)
print_l1_dcache_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
@ -980,8 +994,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
print_l1_icache_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0)
print_l1_icache_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
@ -990,8 +1004,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0)
print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
@ -1000,8 +1014,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0)
print_itlb_cache_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
@ -1010,27 +1024,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0)
print_ll_cache_misses(config, cpu, avg, out, st, &rsd);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd);
if (total)
ratio = avg * 100 / total;
if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd);
} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
if (total) {
ratio = avg / total;
@ -1039,7 +1053,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
if (total)
print_metric(config, ctxp, NULL,
@ -1049,8 +1063,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd);
total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
if (total2 < avg)
total2 = avg;
@ -1060,21 +1074,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
if (avg)
ratio = total / avg;
if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0)
print_metric(config, ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
ctx, cpu);
total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd);
if (avg)
ratio = total / avg;
@ -1087,28 +1099,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
double fe_bound = td_fe_bound(ctx, cpu, st);
double fe_bound = td_fe_bound(cpu, st, &rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
double retiring = td_retiring(ctx, cpu, st);
double retiring = td_retiring(cpu, st, &rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
double bad_spec = td_bad_spec(ctx, cpu, st);
double bad_spec = td_bad_spec(cpu, st, &rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
bad_spec * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
double be_bound = td_be_bound(ctx, cpu, st);
double be_bound = td_be_bound(cpu, st, &rsd);
const char *name = "backend bound";
static int have_recovery_bubbles = -1;
@ -1121,43 +1133,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (be_bound > 0.2)
color = PERF_COLOR_RED;
if (td_total_slots(ctx, cpu, st) > 0)
if (td_total_slots(cpu, st, &rsd) > 0)
print_metric(config, ctxp, color, "%8.1f%%", name,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
full_td(ctx, cpu, st)) {
double retiring = td_metric_ratio(ctx, cpu,
STAT_TOPDOWN_RETIRING, st);
full_td(cpu, st, &rsd)) {
double retiring = td_metric_ratio(cpu,
STAT_TOPDOWN_RETIRING, st,
&rsd);
if (retiring > 0.7)
color = PERF_COLOR_GREEN;
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
retiring * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
full_td(ctx, cpu, st)) {
double fe_bound = td_metric_ratio(ctx, cpu,
STAT_TOPDOWN_FE_BOUND, st);
full_td(cpu, st, &rsd)) {
double fe_bound = td_metric_ratio(cpu,
STAT_TOPDOWN_FE_BOUND, st,
&rsd);
if (fe_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
fe_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
full_td(ctx, cpu, st)) {
double be_bound = td_metric_ratio(ctx, cpu,
STAT_TOPDOWN_BE_BOUND, st);
full_td(cpu, st, &rsd)) {
double be_bound = td_metric_ratio(cpu,
STAT_TOPDOWN_BE_BOUND, st,
&rsd);
if (be_bound > 0.2)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
be_bound * 100.);
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
full_td(ctx, cpu, st)) {
double bad_spec = td_metric_ratio(ctx, cpu,
STAT_TOPDOWN_BAD_SPEC, st);
full_td(cpu, st, &rsd)) {
double bad_spec = td_metric_ratio(cpu,
STAT_TOPDOWN_BAD_SPEC, st,
&rsd);
if (bad_spec > 0.1)
color = PERF_COLOR_RED;
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
@ -1165,11 +1177,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
} else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
char unit = 'M';
char unit_buf[10];
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
if (total)
ratio = 1000.0 * avg / total;
@ -1180,7 +1192,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(config, cpu, evsel, out, st);
print_smi_cost(config, cpu, out, st, &rsd);
} else {
num = 0;
}