mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-29 05:55:02 +08:00
ff165628d7
In LBR call stack mode, the depth of reconstructed LBR call stack limits to the number of LBR registers. For example, on skylake, the depth of reconstructed LBR call stack is always <= 32. # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 6K of event 'cycles' # Event count (approx.): 6487119731 # # Children Self Command Shared Object Symbol # ........ ........ ............... .................. # ................................ 99.97% 99.97% tchain_edit tchain_edit [.] f43 | --99.64%--f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31 f32 f33 f34 f35 f36 f37 f38 f39 f40 f41 f42 f43 For a call stack which is deeper than LBR limit, HW will overwrite the LBR register with oldest branch. Only partial call stacks can be reconstructed. However, the overwritten LBRs may still be retrieved from previous sample. At that moment, HW hasn't overwritten the LBR registers yet. Perf tools can stitch those overwritten LBRs on current call stacks to get a more complete call stack. To determine if LBRs can be stitched, perf tools need to compare current sample with previous sample. - They should have identical LBR records (Same from, to and flags values, and the same physical index of LBR registers). - The searching starts from the base-of-stack of current sample. Once perf determines to stitch the previous LBRs, the corresponding LBR cursor nodes will be copied to 'lists'. The 'lists' is to track the LBR cursor nodes which are going to be stitched. When the stitching is over, the nodes will not be freed immediately. They will be moved to 'free_lists'. Next stitching may reuse the space. Both 'lists' and 'free_lists' will be freed when all samples are processed. Committer notes: Fix the intel-pt.c initialization of the union with 'struct branch_flags', that breaks the build with its unnamed union on older gcc versions. Uninline thread__free_stitch_list(), as it grew big and started dragging includes to thread.h, so move it to thread.c where what it needs in terms of headers are already there. This fixes the build in several systems such as debian:experimental when cross building to the MIPS32 architecture, i.e. in the other cases what was needed was being included by sheer luck. In file included from builtin-sched.c:11: util/thread.h: In function 'thread__free_stitch_list': util/thread.h:169:3: error: implicit declaration of function 'free' [-Werror=implicit-function-declaration] 169 | free(pos); | ^~~~ util/thread.h:169:3: error: incompatible implicit declaration of built-in function 'free' [-Werror] util/thread.h:19:1: note: include '<stdlib.h>' or provide a declaration of 'free' 18 | #include "callchain.h" +++ |+#include <stdlib.h> 19 | util/thread.h:174:3: error: incompatible implicit declaration of built-in function 'free' [-Werror] 174 | free(pos); | ^~~~ util/thread.h:174:3: note: include '<stdlib.h>' or provide a declaration of 'free' Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexey Budankov <alexey.budankov@linux.intel.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Pavel Gerasimov <pavel.gerasimov@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Cc: Stephane Eranian <eranian@google.com> Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com> Link: http://lore.kernel.org/lkml/20200319202517.23423-13-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
89 lines
2.0 KiB
C
89 lines
2.0 KiB
C
#ifndef _PERF_BRANCH_H
|
|
#define _PERF_BRANCH_H 1
|
|
/*
|
|
* The linux/stddef.h isn't need here, but is needed for __always_inline used
|
|
* in files included from uapi/linux/perf_event.h such as
|
|
* /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
|
|
* detected in at least musl libc, used in Alpine Linux. -acme
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/types.h>
|
|
#include "event.h"
|
|
|
|
struct branch_flags {
|
|
union {
|
|
u64 value;
|
|
struct {
|
|
u64 mispred:1;
|
|
u64 predicted:1;
|
|
u64 in_tx:1;
|
|
u64 abort:1;
|
|
u64 cycles:16;
|
|
u64 type:4;
|
|
u64 reserved:40;
|
|
};
|
|
};
|
|
};
|
|
|
|
struct branch_info {
|
|
struct addr_map_symbol from;
|
|
struct addr_map_symbol to;
|
|
struct branch_flags flags;
|
|
char *srcline_from;
|
|
char *srcline_to;
|
|
};
|
|
|
|
struct branch_entry {
|
|
u64 from;
|
|
u64 to;
|
|
struct branch_flags flags;
|
|
};
|
|
|
|
struct branch_stack {
|
|
u64 nr;
|
|
u64 hw_idx;
|
|
struct branch_entry entries[0];
|
|
};
|
|
|
|
/*
|
|
* The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
|
|
* Otherwise, the output format of a sample with branch stack is
|
|
* struct branch_stack {
|
|
* u64 nr;
|
|
* struct branch_entry entries[0];
|
|
* }
|
|
* Check whether the hw_idx is available,
|
|
* and return the corresponding pointer of entries[0].
|
|
*/
|
|
static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
|
|
{
|
|
u64 *entry = (u64 *)sample->branch_stack;
|
|
|
|
entry++;
|
|
if (sample->no_hw_idx)
|
|
return (struct branch_entry *)entry;
|
|
return (struct branch_entry *)(++entry);
|
|
}
|
|
|
|
struct branch_type_stat {
|
|
bool branch_to;
|
|
u64 counts[PERF_BR_MAX];
|
|
u64 cond_fwd;
|
|
u64 cond_bwd;
|
|
u64 cross_4k;
|
|
u64 cross_2m;
|
|
};
|
|
|
|
void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
|
|
u64 from, u64 to);
|
|
|
|
const char *branch_type_name(int type);
|
|
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
|
|
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
|
|
|
|
#endif /* _PERF_BRANCH_H */
|