2017-07-18 20:13:13 +08:00
|
|
|
#ifndef _PERF_BRANCH_H
|
|
|
|
#define _PERF_BRANCH_H 1
|
2019-09-03 21:56:06 +08:00
|
|
|
/*
|
|
|
|
* The linux/stddef.h isn't need here, but is needed for __always_inline used
|
|
|
|
* in files included from uapi/linux/perf_event.h such as
|
|
|
|
* /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
|
|
|
|
* detected in at least musl libc, used in Alpine Linux. -acme
|
|
|
|
*/
|
2019-01-22 20:58:22 +08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
#include <linux/types.h>
|
2022-10-27 04:02:23 +08:00
|
|
|
#include "util/map_symbol.h"
|
2022-10-27 04:24:27 +08:00
|
|
|
#include "util/sample.h"
|
2019-01-22 20:58:22 +08:00
|
|
|
|
|
|
|
struct branch_flags {
|
perf callchain: Stitch LBR call stack
In LBR call stack mode, the depth of reconstructed LBR call stack limits
to the number of LBR registers.
For example, on skylake, the depth of reconstructed LBR call stack is
always <= 32.
# To display the perf.data header info, please use
# --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 6K of event 'cycles'
# Event count (approx.): 6487119731
#
# Children Self Command Shared Object Symbol
# ........ ........ ............... ..................
# ................................
99.97% 99.97% tchain_edit tchain_edit [.] f43
|
--99.64%--f11
f12
f13
f14
f15
f16
f17
f18
f19
f20
f21
f22
f23
f24
f25
f26
f27
f28
f29
f30
f31
f32
f33
f34
f35
f36
f37
f38
f39
f40
f41
f42
f43
For a call stack which is deeper than LBR limit, HW will overwrite the
LBR register with oldest branch. Only partial call stacks can be
reconstructed.
However, the overwritten LBRs may still be retrieved from previous
sample. At that moment, HW hasn't overwritten the LBR registers yet.
Perf tools can stitch those overwritten LBRs on current call stacks to
get a more complete call stack.
To determine if LBRs can be stitched, perf tools need to compare current
sample with previous sample.
- They should have identical LBR records (Same from, to and flags
values, and the same physical index of LBR registers).
- The searching starts from the base-of-stack of current sample.
Once perf determines to stitch the previous LBRs, the corresponding LBR
cursor nodes will be copied to 'lists'. The 'lists' is to track the LBR
cursor nodes which are going to be stitched.
When the stitching is over, the nodes will not be freed immediately.
They will be moved to 'free_lists'. Next stitching may reuse the space.
Both 'lists' and 'free_lists' will be freed when all samples are
processed.
Committer notes:
Fix the intel-pt.c initialization of the union with 'struct
branch_flags', that breaks the build with its unnamed union on older gcc
versions.
Uninline thread__free_stitch_list(), as it grew big and started dragging
includes to thread.h, so move it to thread.c where what it needs in
terms of headers are already there.
This fixes the build in several systems such as debian:experimental when
cross building to the MIPS32 architecture, i.e. in the other cases what
was needed was being included by sheer luck.
In file included from builtin-sched.c:11:
util/thread.h: In function 'thread__free_stitch_list':
util/thread.h:169:3: error: implicit declaration of function 'free' [-Werror=implicit-function-declaration]
169 | free(pos);
| ^~~~
util/thread.h:169:3: error: incompatible implicit declaration of built-in function 'free' [-Werror]
util/thread.h:19:1: note: include '<stdlib.h>' or provide a declaration of 'free'
18 | #include "callchain.h"
+++ |+#include <stdlib.h>
19 |
util/thread.h:174:3: error: incompatible implicit declaration of built-in function 'free' [-Werror]
174 | free(pos);
| ^~~~
util/thread.h:174:3: note: include '<stdlib.h>' or provide a declaration of 'free'
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pavel Gerasimov <pavel.gerasimov@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>
Link: http://lore.kernel.org/lkml/20200319202517.23423-13-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-03-20 04:25:12 +08:00
|
|
|
union {
|
|
|
|
u64 value;
|
|
|
|
struct {
|
|
|
|
u64 mispred:1;
|
|
|
|
u64 predicted:1;
|
|
|
|
u64 in_tx:1;
|
|
|
|
u64 abort:1;
|
|
|
|
u64 cycles:16;
|
|
|
|
u64 type:4;
|
2022-12-01 00:51:58 +08:00
|
|
|
u64 spec:2;
|
2022-08-24 12:48:20 +08:00
|
|
|
u64 new_type:4;
|
2022-08-24 12:48:21 +08:00
|
|
|
u64 priv:3;
|
2022-12-01 00:51:58 +08:00
|
|
|
u64 reserved:31;
|
perf callchain: Stitch LBR call stack
In LBR call stack mode, the depth of reconstructed LBR call stack limits
to the number of LBR registers.
For example, on skylake, the depth of reconstructed LBR call stack is
always <= 32.
# To display the perf.data header info, please use
# --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 6K of event 'cycles'
# Event count (approx.): 6487119731
#
# Children Self Command Shared Object Symbol
# ........ ........ ............... ..................
# ................................
99.97% 99.97% tchain_edit tchain_edit [.] f43
|
--99.64%--f11
f12
f13
f14
f15
f16
f17
f18
f19
f20
f21
f22
f23
f24
f25
f26
f27
f28
f29
f30
f31
f32
f33
f34
f35
f36
f37
f38
f39
f40
f41
f42
f43
For a call stack which is deeper than LBR limit, HW will overwrite the
LBR register with oldest branch. Only partial call stacks can be
reconstructed.
However, the overwritten LBRs may still be retrieved from previous
sample. At that moment, HW hasn't overwritten the LBR registers yet.
Perf tools can stitch those overwritten LBRs on current call stacks to
get a more complete call stack.
To determine if LBRs can be stitched, perf tools need to compare current
sample with previous sample.
- They should have identical LBR records (Same from, to and flags
values, and the same physical index of LBR registers).
- The searching starts from the base-of-stack of current sample.
Once perf determines to stitch the previous LBRs, the corresponding LBR
cursor nodes will be copied to 'lists'. The 'lists' is to track the LBR
cursor nodes which are going to be stitched.
When the stitching is over, the nodes will not be freed immediately.
They will be moved to 'free_lists'. Next stitching may reuse the space.
Both 'lists' and 'free_lists' will be freed when all samples are
processed.
Committer notes:
Fix the intel-pt.c initialization of the union with 'struct
branch_flags', that breaks the build with its unnamed union on older gcc
versions.
Uninline thread__free_stitch_list(), as it grew big and started dragging
includes to thread.h, so move it to thread.c where what it needs in
terms of headers are already there.
This fixes the build in several systems such as debian:experimental when
cross building to the MIPS32 architecture, i.e. in the other cases what
was needed was being included by sheer luck.
In file included from builtin-sched.c:11:
util/thread.h: In function 'thread__free_stitch_list':
util/thread.h:169:3: error: implicit declaration of function 'free' [-Werror=implicit-function-declaration]
169 | free(pos);
| ^~~~
util/thread.h:169:3: error: incompatible implicit declaration of built-in function 'free' [-Werror]
util/thread.h:19:1: note: include '<stdlib.h>' or provide a declaration of 'free'
18 | #include "callchain.h"
+++ |+#include <stdlib.h>
19 |
util/thread.h:174:3: error: incompatible implicit declaration of built-in function 'free' [-Werror]
174 | free(pos);
| ^~~~
util/thread.h:174:3: note: include '<stdlib.h>' or provide a declaration of 'free'
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pavel Gerasimov <pavel.gerasimov@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>
Link: http://lore.kernel.org/lkml/20200319202517.23423-13-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-03-20 04:25:12 +08:00
|
|
|
};
|
|
|
|
};
|
2019-01-22 20:58:22 +08:00
|
|
|
};
|
|
|
|
|
2019-08-31 02:09:54 +08:00
|
|
|
struct branch_info {
|
|
|
|
struct addr_map_symbol from;
|
|
|
|
struct addr_map_symbol to;
|
|
|
|
struct branch_flags flags;
|
|
|
|
char *srcline_from;
|
|
|
|
char *srcline_to;
|
|
|
|
};
|
|
|
|
|
2019-01-22 20:58:22 +08:00
|
|
|
struct branch_entry {
|
|
|
|
u64 from;
|
|
|
|
u64 to;
|
|
|
|
struct branch_flags flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct branch_stack {
|
|
|
|
u64 nr;
|
2020-02-29 00:30:00 +08:00
|
|
|
u64 hw_idx;
|
2020-05-21 03:16:13 +08:00
|
|
|
struct branch_entry entries[];
|
2019-01-22 20:58:22 +08:00
|
|
|
};
|
2017-07-18 20:13:13 +08:00
|
|
|
|
2020-02-29 00:30:00 +08:00
|
|
|
/*
|
|
|
|
* The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
|
|
|
|
* Otherwise, the output format of a sample with branch stack is
|
|
|
|
* struct branch_stack {
|
|
|
|
* u64 nr;
|
|
|
|
* struct branch_entry entries[0];
|
|
|
|
* }
|
|
|
|
* Check whether the hw_idx is available,
|
|
|
|
* and return the corresponding pointer of entries[0].
|
|
|
|
*/
|
|
|
|
static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
|
|
|
|
{
|
|
|
|
u64 *entry = (u64 *)sample->branch_stack;
|
|
|
|
|
|
|
|
entry++;
|
|
|
|
if (sample->no_hw_idx)
|
|
|
|
return (struct branch_entry *)entry;
|
|
|
|
return (struct branch_entry *)(++entry);
|
|
|
|
}
|
|
|
|
|
2017-07-18 20:13:13 +08:00
|
|
|
struct branch_type_stat {
|
2017-07-24 19:09:07 +08:00
|
|
|
bool branch_to;
|
|
|
|
u64 counts[PERF_BR_MAX];
|
2022-08-24 12:48:20 +08:00
|
|
|
u64 new_counts[PERF_BR_NEW_MAX];
|
2017-07-24 19:09:07 +08:00
|
|
|
u64 cond_fwd;
|
|
|
|
u64 cond_bwd;
|
|
|
|
u64 cross_4k;
|
|
|
|
u64 cross_2m;
|
2017-07-18 20:13:13 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
|
|
|
|
u64 from, u64 to);
|
|
|
|
|
|
|
|
const char *branch_type_name(int type);
|
2022-08-24 12:48:20 +08:00
|
|
|
const char *branch_new_type_name(int new_type);
|
|
|
|
const char *get_branch_type(struct branch_entry *e);
|
2017-07-18 20:13:13 +08:00
|
|
|
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
|
|
|
|
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
|
|
|
|
|
2023-02-02 20:26:14 +08:00
|
|
|
const char *branch_spec_desc(int spec);
|
|
|
|
|
2017-07-18 20:13:13 +08:00
|
|
|
#endif /* _PERF_BRANCH_H */
|