mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-16 02:44:26 +08:00
e345997914
When AUX area events are used in sampling mode, they must be the group leader, but the group leader is also used for leader-sampling. However, it is not desirable to use an AUX area event as the leader for leader-sampling, because it doesn't have any samples of its own. To support leader-sampling with AUX area events, use the 2nd event of the group as the "leader" for the purposes of leader-sampling. Example: # perf record --kcore --aux-sample -e '{intel_pt//,cycles,instructions}:S' -c 10000 uname [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.786 MB perf.data ] # perf report Samples: 380 of events 'anon group { cycles, instructions }', Event count (approx.): 3026164 Children Self Command Shared Object Symbol + 38.76% 42.65% 0.00% 0.00% uname [kernel.kallsyms] [k] __x86_indirect_thunk_rax + 35.82% 31.33% 0.00% 0.00% uname ld-2.28.so [.] _dl_start_user + 34.29% 29.74% 0.55% 0.47% uname ld-2.28.so [.] _dl_start + 33.73% 28.62% 1.60% 0.97% uname ld-2.28.so [.] dl_main + 33.19% 29.04% 0.52% 0.32% uname ld-2.28.so [.] _dl_sysdep_start + 27.83% 33.74% 0.00% 0.00% uname [kernel.kallsyms] [k] do_syscall_64 + 26.76% 33.29% 0.00% 0.00% uname [kernel.kallsyms] [k] entry_SYSCALL_64_after_hwframe + 23.78% 20.33% 5.97% 5.25% uname [kernel.kallsyms] [k] page_fault + 23.18% 24.60% 0.00% 0.00% uname libc-2.28.so [.] __libc_start_main + 22.64% 24.37% 0.00% 0.00% uname uname [.] _start + 21.04% 23.27% 0.00% 0.00% uname uname [.] main + 19.48% 18.08% 3.72% 3.64% uname ld-2.28.so [.] _dl_relocate_object + 19.47% 21.81% 0.00% 0.00% uname libc-2.28.so [.] setlocale + 19.44% 21.56% 0.52% 0.61% uname libc-2.28.so [.] _nl_find_locale + 17.87% 19.66% 0.00% 0.00% uname libc-2.28.so [.] _nl_load_locale_from_archive + 15.71% 13.73% 0.53% 0.52% uname [kernel.kallsyms] [k] do_page_fault + 15.18% 13.21% 1.03% 0.68% uname [kernel.kallsyms] [k] handle_mm_fault + 14.15% 12.53% 1.01% 1.12% uname [kernel.kallsyms] [k] __handle_mm_fault + 12.03% 9.67% 0.54% 0.32% uname ld-2.28.so [.] _dl_map_object + 10.55% 8.48% 0.00% 0.00% uname ld-2.28.so [.] openaux + 10.55% 20.20% 0.52% 0.61% uname libc-2.28.so [.] __run_exit_handlers Comnmitter notes: Fixed up this problem: util/record.c: In function ‘perf_evlist__config’: util/record.c:256:3: error: too few arguments to function ‘perf_evsel__config_leader_sampling’ 256 | perf_evsel__config_leader_sampling(evsel); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ util/record.c:190:13: note: declared here 190 | static void perf_evsel__config_leader_sampling(struct evsel *evsel, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lore.kernel.org/lkml/20200401101613.6201-17-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
424 lines
9.8 KiB
C
424 lines
9.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include "debug.h"
|
|
#include "evlist.h"
|
|
#include "evsel.h"
|
|
#include "parse-events.h"
|
|
#include <errno.h>
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <api/fs/fs.h>
|
|
#include <subcmd/parse-options.h>
|
|
#include <perf/cpumap.h>
|
|
#include "cloexec.h"
|
|
#include "record.h"
|
|
#include "../perf-sys.h"
|
|
|
|
typedef void (*setup_probe_fn_t)(struct evsel *evsel);
|
|
|
|
static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
|
|
{
|
|
struct evlist *evlist;
|
|
struct evsel *evsel;
|
|
unsigned long flags = perf_event_open_cloexec_flag();
|
|
int err = -EAGAIN, fd;
|
|
static pid_t pid = -1;
|
|
|
|
evlist = evlist__new();
|
|
if (!evlist)
|
|
return -ENOMEM;
|
|
|
|
if (parse_events(evlist, str, NULL))
|
|
goto out_delete;
|
|
|
|
evsel = evlist__first(evlist);
|
|
|
|
while (1) {
|
|
fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
|
|
if (fd < 0) {
|
|
if (pid == -1 && errno == EACCES) {
|
|
pid = 0;
|
|
continue;
|
|
}
|
|
goto out_delete;
|
|
}
|
|
break;
|
|
}
|
|
close(fd);
|
|
|
|
fn(evsel);
|
|
|
|
fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
|
|
if (fd < 0) {
|
|
if (errno == EINVAL)
|
|
err = -EINVAL;
|
|
goto out_delete;
|
|
}
|
|
close(fd);
|
|
err = 0;
|
|
|
|
out_delete:
|
|
evlist__delete(evlist);
|
|
return err;
|
|
}
|
|
|
|
static bool perf_probe_api(setup_probe_fn_t fn)
|
|
{
|
|
const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
|
|
struct perf_cpu_map *cpus;
|
|
int cpu, ret, i = 0;
|
|
|
|
cpus = perf_cpu_map__new(NULL);
|
|
if (!cpus)
|
|
return false;
|
|
cpu = cpus->map[0];
|
|
perf_cpu_map__put(cpus);
|
|
|
|
do {
|
|
ret = perf_do_probe_api(fn, cpu, try[i++]);
|
|
if (!ret)
|
|
return true;
|
|
} while (ret == -EAGAIN && try[i]);
|
|
|
|
return false;
|
|
}
|
|
|
|
static void perf_probe_sample_identifier(struct evsel *evsel)
|
|
{
|
|
evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
|
|
}
|
|
|
|
static void perf_probe_comm_exec(struct evsel *evsel)
|
|
{
|
|
evsel->core.attr.comm_exec = 1;
|
|
}
|
|
|
|
static void perf_probe_context_switch(struct evsel *evsel)
|
|
{
|
|
evsel->core.attr.context_switch = 1;
|
|
}
|
|
|
|
bool perf_can_sample_identifier(void)
|
|
{
|
|
return perf_probe_api(perf_probe_sample_identifier);
|
|
}
|
|
|
|
static bool perf_can_comm_exec(void)
|
|
{
|
|
return perf_probe_api(perf_probe_comm_exec);
|
|
}
|
|
|
|
bool perf_can_record_switch_events(void)
|
|
{
|
|
return perf_probe_api(perf_probe_context_switch);
|
|
}
|
|
|
|
bool perf_can_record_cpu_wide(void)
|
|
{
|
|
struct perf_event_attr attr = {
|
|
.type = PERF_TYPE_SOFTWARE,
|
|
.config = PERF_COUNT_SW_CPU_CLOCK,
|
|
.exclude_kernel = 1,
|
|
};
|
|
struct perf_cpu_map *cpus;
|
|
int cpu, fd;
|
|
|
|
cpus = perf_cpu_map__new(NULL);
|
|
if (!cpus)
|
|
return false;
|
|
cpu = cpus->map[0];
|
|
perf_cpu_map__put(cpus);
|
|
|
|
fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
|
|
if (fd < 0)
|
|
return false;
|
|
close(fd);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Architectures are expected to know if AUX area sampling is supported by the
|
|
* hardware. Here we check for kernel support.
|
|
*/
|
|
bool perf_can_aux_sample(void)
|
|
{
|
|
struct perf_event_attr attr = {
|
|
.size = sizeof(struct perf_event_attr),
|
|
.exclude_kernel = 1,
|
|
/*
|
|
* Non-zero value causes the kernel to calculate the effective
|
|
* attribute size up to that byte.
|
|
*/
|
|
.aux_sample_size = 1,
|
|
};
|
|
int fd;
|
|
|
|
fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
|
|
/*
|
|
* If the kernel attribute is big enough to contain aux_sample_size
|
|
* then we assume that it is supported. We are relying on the kernel to
|
|
* validate the attribute size before anything else that could be wrong.
|
|
*/
|
|
if (fd < 0 && errno == E2BIG)
|
|
return false;
|
|
if (fd >= 0)
|
|
close(fd);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* perf_evsel__config_leader_sampling() uses special rules for leader sampling.
|
|
* However, if the leader is an AUX area event, then assume the event to sample
|
|
* is the next event.
|
|
*/
|
|
static struct evsel *perf_evsel__read_sampler(struct evsel *evsel,
|
|
struct evlist *evlist)
|
|
{
|
|
struct evsel *leader = evsel->leader;
|
|
|
|
if (perf_evsel__is_aux_event(leader)) {
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
if (evsel->leader == leader && evsel != evsel->leader)
|
|
return evsel;
|
|
}
|
|
}
|
|
|
|
return leader;
|
|
}
|
|
|
|
static void perf_evsel__config_leader_sampling(struct evsel *evsel,
|
|
struct evlist *evlist)
|
|
{
|
|
struct perf_event_attr *attr = &evsel->core.attr;
|
|
struct evsel *leader = evsel->leader;
|
|
struct evsel *read_sampler;
|
|
|
|
if (!leader->sample_read)
|
|
return;
|
|
|
|
read_sampler = perf_evsel__read_sampler(evsel, evlist);
|
|
|
|
if (evsel == read_sampler)
|
|
return;
|
|
|
|
/*
|
|
* Disable sampling for all group members other than the leader in
|
|
* case the leader 'leads' the sampling, except when the leader is an
|
|
* AUX area event, in which case the 2nd event in the group is the one
|
|
* that 'leads' the sampling.
|
|
*/
|
|
attr->freq = 0;
|
|
attr->sample_freq = 0;
|
|
attr->sample_period = 0;
|
|
attr->write_backward = 0;
|
|
|
|
/*
|
|
* We don't get a sample for slave events, we make them when delivering
|
|
* the group leader sample. Set the slave event to follow the master
|
|
* sample_type to ease up reporting.
|
|
* An AUX area event also has sample_type requirements, so also include
|
|
* the sample type bits from the leader's sample_type to cover that
|
|
* case.
|
|
*/
|
|
attr->sample_type = read_sampler->core.attr.sample_type |
|
|
leader->core.attr.sample_type;
|
|
}
|
|
|
|
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
|
|
struct callchain_param *callchain)
|
|
{
|
|
struct evsel *evsel;
|
|
bool use_sample_identifier = false;
|
|
bool use_comm_exec;
|
|
bool sample_id = opts->sample_id;
|
|
|
|
/*
|
|
* Set the evsel leader links before we configure attributes,
|
|
* since some might depend on this info.
|
|
*/
|
|
if (opts->group)
|
|
perf_evlist__set_leader(evlist);
|
|
|
|
if (evlist->core.cpus->map[0] < 0)
|
|
opts->no_inherit = true;
|
|
|
|
use_comm_exec = perf_can_comm_exec();
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
perf_evsel__config(evsel, opts, callchain);
|
|
if (evsel->tracking && use_comm_exec)
|
|
evsel->core.attr.comm_exec = 1;
|
|
}
|
|
|
|
/* Configure leader sampling here now that the sample type is known */
|
|
evlist__for_each_entry(evlist, evsel)
|
|
perf_evsel__config_leader_sampling(evsel, evlist);
|
|
|
|
if (opts->full_auxtrace) {
|
|
/*
|
|
* Need to be able to synthesize and parse selected events with
|
|
* arbitrary sample types, which requires always being able to
|
|
* match the id.
|
|
*/
|
|
use_sample_identifier = perf_can_sample_identifier();
|
|
sample_id = true;
|
|
} else if (evlist->core.nr_entries > 1) {
|
|
struct evsel *first = evlist__first(evlist);
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
if (evsel->core.attr.sample_type == first->core.attr.sample_type)
|
|
continue;
|
|
use_sample_identifier = perf_can_sample_identifier();
|
|
break;
|
|
}
|
|
sample_id = true;
|
|
}
|
|
|
|
if (sample_id) {
|
|
evlist__for_each_entry(evlist, evsel)
|
|
perf_evsel__set_sample_id(evsel, use_sample_identifier);
|
|
}
|
|
|
|
perf_evlist__set_id_pos(evlist);
|
|
}
|
|
|
|
static int get_max_rate(unsigned int *rate)
|
|
{
|
|
return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
|
|
}
|
|
|
|
static int record_opts__config_freq(struct record_opts *opts)
|
|
{
|
|
bool user_freq = opts->user_freq != UINT_MAX;
|
|
unsigned int max_rate;
|
|
|
|
if (opts->user_interval != ULLONG_MAX)
|
|
opts->default_interval = opts->user_interval;
|
|
if (user_freq)
|
|
opts->freq = opts->user_freq;
|
|
|
|
/*
|
|
* User specified count overrides default frequency.
|
|
*/
|
|
if (opts->default_interval)
|
|
opts->freq = 0;
|
|
else if (opts->freq) {
|
|
opts->default_interval = opts->freq;
|
|
} else {
|
|
pr_err("frequency and count are zero, aborting\n");
|
|
return -1;
|
|
}
|
|
|
|
if (get_max_rate(&max_rate))
|
|
return 0;
|
|
|
|
/*
|
|
* User specified frequency is over current maximum.
|
|
*/
|
|
if (user_freq && (max_rate < opts->freq)) {
|
|
if (opts->strict_freq) {
|
|
pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
|
|
" Please use -F freq option with a lower value or consider\n"
|
|
" tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
|
|
max_rate);
|
|
return -1;
|
|
} else {
|
|
pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
|
|
" The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
|
|
" The kernel will lower it when perf's interrupts take too long.\n"
|
|
" Use --strict-freq to disable this throttling, refusing to record.\n",
|
|
max_rate, opts->freq, max_rate);
|
|
|
|
opts->freq = max_rate;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Default frequency is over current maximum.
|
|
*/
|
|
if (max_rate < opts->freq) {
|
|
pr_warning("Lowering default frequency rate to %u.\n"
|
|
"Please consider tweaking "
|
|
"/proc/sys/kernel/perf_event_max_sample_rate.\n",
|
|
max_rate);
|
|
opts->freq = max_rate;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int record_opts__config(struct record_opts *opts)
|
|
{
|
|
return record_opts__config_freq(opts);
|
|
}
|
|
|
|
bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
|
|
{
|
|
struct evlist *temp_evlist;
|
|
struct evsel *evsel;
|
|
int err, fd, cpu;
|
|
bool ret = false;
|
|
pid_t pid = -1;
|
|
|
|
temp_evlist = evlist__new();
|
|
if (!temp_evlist)
|
|
return false;
|
|
|
|
err = parse_events(temp_evlist, str, NULL);
|
|
if (err)
|
|
goto out_delete;
|
|
|
|
evsel = evlist__last(temp_evlist);
|
|
|
|
if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
|
|
struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
|
|
|
|
cpu = cpus ? cpus->map[0] : 0;
|
|
perf_cpu_map__put(cpus);
|
|
} else {
|
|
cpu = evlist->core.cpus->map[0];
|
|
}
|
|
|
|
while (1) {
|
|
fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
|
|
perf_event_open_cloexec_flag());
|
|
if (fd < 0) {
|
|
if (pid == -1 && errno == EACCES) {
|
|
pid = 0;
|
|
continue;
|
|
}
|
|
goto out_delete;
|
|
}
|
|
break;
|
|
}
|
|
close(fd);
|
|
ret = true;
|
|
|
|
out_delete:
|
|
evlist__delete(temp_evlist);
|
|
return ret;
|
|
}
|
|
|
|
int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
|
|
{
|
|
unsigned int freq;
|
|
struct record_opts *opts = opt->value;
|
|
|
|
if (!str)
|
|
return -EINVAL;
|
|
|
|
if (strcasecmp(str, "max") == 0) {
|
|
if (get_max_rate(&freq)) {
|
|
pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
|
|
return -1;
|
|
}
|
|
pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
|
|
} else {
|
|
freq = atoi(str);
|
|
}
|
|
|
|
opts->user_freq = freq;
|
|
return 0;
|
|
}
|