perf tools fixes for v5.18: 4th batch

- Fix Intel PT (Processor Trace) timeless decoding with perf.data directory.
 
 - ARM SPE (Statistical Profiling Extensions) address fixes, for synthesized
   events and for SPE events with physical addresses.  Add a simple 'perf test'
   entry to make sure this doesn't regress.
 
 - Remove arch specific processing of kallsyms data to fixup symbol end address,
   fixing excessive memory consumption in the annotation code.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYmvfpwAKCRCyPKLppCJ+
 JysNAQDtEIvGuRtjANnFqDQqyhrffvAg5BFkLg1HDYAttdsT0AD/bveO3Be5AoVH
 ocyoL9W5qoGo0pgxS5qfB13o5bvhwAE=
 =UlpT
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix Intel PT (Processor Trace) timeless decoding with perf.data
   directory.

 - ARM SPE (Statistical Profiling Extensions) address fixes, for
   synthesized events and for SPE events with physical addresses. Add a
   simple 'perf test' entry to make sure this doesn't regress.

 - Remove arch specific processing of kallsyms data to fixup symbol end
   address, fixing excessive memory consumption in the annotation code.

* tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf symbol: Remove arch__symbols__fixup_end()
  perf symbol: Update symbols__fixup_end()
  perf symbol: Pass is_kallsyms to symbols__fixup_end()
  perf test: Add perf_event_attr test for Arm SPE
  perf arm-spe: Fix SPE events with phys addresses
  perf arm-spe: Fix addresses of synthesized SPE events
  perf intel-pt: Fix timeless decoding with perf.data directory
This commit is contained in:
Linus Torvalds 2022-04-29 11:34:07 -07:00
commit 3e71713c9e
12 changed files with 56 additions and 79 deletions

View File

@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
u64 bit;
sper->evlist = evlist;
@ -245,6 +246,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/
evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
/*
* The PHYS_ADDR flag does not affect the driver behaviour, it is used to
* inform that the resulting output's SPE samples contain physical addresses
* where applicable.
*/
bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
if (arm_spe_evsel->core.attr.config & bit)
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)

View File

@ -8,27 +8,6 @@
#include "callchain.h"
#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
* address, like 0xffff 0000 00ax xxxx. When only small amount of
* memory is used by modules, gap between end of module's text segment
* and start of kernel text segment may reach 2G.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
#define SYMBOL_LIMIT (1 << 12) /* 4K */
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
(strchr(p->name, '[') == NULL && strchr(c->name, '[')))
/* Limit range of last symbol in module and kernel */
p->end += SYMBOL_LIMIT;
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;

View File

@ -1,5 +1,4 @@
perf-y += header.o
perf-y += machine.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o

View File

@ -1,25 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include <internal/lib.h> // page_size
#include "debug.h"
#include "symbol.h"
/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
* whereas the modules are located at very high memory addresses,
* for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
* and beginning of first module's text segment is very high.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
/* Limit the range of last kernel symbol */
p->end += page_size;
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}

View File

@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
return 0;
}
/* On s390 kernel text segment start is located at very low memory addresses,
* for example 0x10000. Modules are located at very high memory addresses,
* for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
* and beginning of first module's text segment is very big.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
/* Last kernel symbol mapped to end of page */
p->end = roundup(p->end, page_size);
else
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}

View File

@ -60,6 +60,7 @@ Following tests are defined (with perf commands):
perf record -R kill (test-record-raw)
perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period)
perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term)
perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
perf stat -d kill (test-stat-detailed-1)

View File

@ -0,0 +1,12 @@
[config]
command = record
args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1
ret = 1
arch = aarch64
[event-10:base-record-spe]
# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR
sample_type=622727
# dummy event
[event-1:base-record-spe]

View File

@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type = evsel->core.attr.sample_type &
(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
PERF_SAMPLE_WEIGHT;
PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else

View File

@ -2576,7 +2576,7 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
if (perf_data__is_dir(session->data))
if (perf_data__is_dir(session->data) && session->data->dir.nr)
return __perf_session__process_dir_events(session);
return __perf_session__process_events(session);

View File

@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
symbols__fixup_end(&dso->symbols);
symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*

View File

@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
{
p->end = c->start;
}
const char * __weak arch__normalize_symbol_name(const char *name)
{
return name;
@ -217,7 +212,8 @@ again:
}
}
void symbols__fixup_end(struct rb_root_cached *symbols)
/* Update zero-sized symbols using the address of the next symbol */
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{
struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
if (prev->end == prev->start || prev->end != curr->start)
arch__symbols__fixup_end(prev, curr);
/*
* On some architecture kernel text segment start is located at
* some low memory address, while modules are located at high
* memory addresses (or vice versa). The gap between end of
* kernel text segment and beginning of first module's text
* segment is very big. Therefore do not fill this gap and do
* not assign it to the kernel dso map (kallsyms).
*
* In kallsyms, it determines module symbols using '[' character
* like in:
* ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
*/
if (prev->end == prev->start) {
/* Last kernel/module symbol mapped to end of page */
if (is_kallsyms && (!strchr(prev->name, '[') !=
!strchr(curr->name, '[')))
prev->end = roundup(prev->end + 4096, 4096);
else
prev->end = curr->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
__func__, prev->name, prev->end);
}
}
/* Last entry */
@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta))
return -1;
symbols__fixup_end(&dso->symbols);
symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section
#endif
symbols__fixup_end(&dso->symbols);
symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1;

View File

@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols);
void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0
#define SYMBOL_B 1
void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n);