mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 23:24:05 +08:00
19007a67a6
This bring the support for raw syscall events in perfcounters. The arguments or exit value are saved as a raw sample using the PERF_SAMPLE_RAW attribute in a perf counter. Example (for now you must explicitly set the PERF_SAMPLE_RAW flag in perf record): perf record -e syscalls:sys_enter_open -f -F 1 -a perf report -D 0x2cbb8 [0x50]: event: 9 . . ... raw event: size 80 bytes . 0000: 09 00 00 00 02 00 50 00 20 e9 39 ab 0a 7f 00 00 ......P. .9.... . 0010: bc 14 00 00 bc 14 00 00 01 00 00 00 00 00 00 00 ............... . 0020: 2c 00 00 00 15 01 01 00 bc 14 00 00 bc 14 00 00 ,.............. ^ ^ ^ ^ ^ ^ ^ .......................... Event Size struct trace_entry . 0030: 00 00 00 00 46 98 43 02 00 00 00 00 80 08 00 00 ....F.C........ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ptr to file name open flags . 0040: 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 ............... ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ . open mode padding 0x2cbb8 [0x50]: PERF_EVENT_SAMPLE (IP, 2): 5308: 0x7f0aab39e920 period: 1 Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Jiaying Zhang <jiayingz@google.com> Cc: Martin Bligh <mbligh@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Masami Hiramatsu <mhiramat@redhat.com>
457 lines
10 KiB
C
457 lines
10 KiB
C
#include <trace/syscall.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/perf_counter.h>
|
|
#include <asm/syscall.h>
|
|
|
|
#include "trace_output.h"
|
|
#include "trace.h"
|
|
|
|
static DEFINE_MUTEX(syscall_trace_lock);
|
|
static int sys_refcount_enter;
|
|
static int sys_refcount_exit;
|
|
static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
|
|
static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
|
|
|
|
/* Option to display the parameters types */
|
|
enum {
|
|
TRACE_SYSCALLS_OPT_TYPES = 0x1,
|
|
};
|
|
|
|
static struct tracer_opt syscalls_opts[] = {
|
|
{ TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
|
|
{ }
|
|
};
|
|
|
|
static struct tracer_flags syscalls_flags = {
|
|
.val = 0, /* By default: no parameters types */
|
|
.opts = syscalls_opts
|
|
};
|
|
|
|
enum print_line_t
|
|
print_syscall_enter(struct trace_iterator *iter, int flags)
|
|
{
|
|
struct trace_seq *s = &iter->seq;
|
|
struct trace_entry *ent = iter->ent;
|
|
struct syscall_trace_enter *trace;
|
|
struct syscall_metadata *entry;
|
|
int i, ret, syscall;
|
|
|
|
trace = (typeof(trace))ent;
|
|
syscall = trace->nr;
|
|
entry = syscall_nr_to_meta(syscall);
|
|
|
|
if (!entry)
|
|
goto end;
|
|
|
|
if (entry->enter_id != ent->type) {
|
|
WARN_ON_ONCE(1);
|
|
goto end;
|
|
}
|
|
|
|
ret = trace_seq_printf(s, "%s(", entry->name);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
/* parameter types */
|
|
if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
|
|
ret = trace_seq_printf(s, "%s ", entry->types[i]);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
}
|
|
/* parameter values */
|
|
ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
|
|
trace->args[i],
|
|
i == entry->nb_args - 1 ? ")" : ",");
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
}
|
|
|
|
end:
|
|
trace_seq_printf(s, "\n");
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
enum print_line_t
|
|
print_syscall_exit(struct trace_iterator *iter, int flags)
|
|
{
|
|
struct trace_seq *s = &iter->seq;
|
|
struct trace_entry *ent = iter->ent;
|
|
struct syscall_trace_exit *trace;
|
|
int syscall;
|
|
struct syscall_metadata *entry;
|
|
int ret;
|
|
|
|
trace = (typeof(trace))ent;
|
|
syscall = trace->nr;
|
|
entry = syscall_nr_to_meta(syscall);
|
|
|
|
if (!entry) {
|
|
trace_seq_printf(s, "\n");
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
if (entry->exit_id != ent->type) {
|
|
WARN_ON_ONCE(1);
|
|
return TRACE_TYPE_UNHANDLED;
|
|
}
|
|
|
|
ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
|
|
trace->ret);
|
|
if (!ret)
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
return TRACE_TYPE_HANDLED;
|
|
}
|
|
|
|
int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s)
|
|
{
|
|
int i;
|
|
int nr;
|
|
int ret = 0;
|
|
struct syscall_metadata *entry;
|
|
int offset = sizeof(struct trace_entry);
|
|
|
|
nr = syscall_name_to_nr((char *)call->data);
|
|
entry = syscall_nr_to_meta(nr);
|
|
|
|
if (!entry)
|
|
return ret;
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
|
|
entry->args[i]);
|
|
if (!ret)
|
|
return 0;
|
|
ret = trace_seq_printf(s, "\toffset:%d;\tsize:%lu;\n", offset,
|
|
sizeof(unsigned long));
|
|
if (!ret)
|
|
return 0;
|
|
offset += sizeof(unsigned long);
|
|
}
|
|
|
|
trace_seq_printf(s, "\nprint fmt: \"");
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
ret = trace_seq_printf(s, "%s: 0x%%0%lulx%s", entry->args[i],
|
|
sizeof(unsigned long),
|
|
i == entry->nb_args - 1 ? "\", " : ", ");
|
|
if (!ret)
|
|
return 0;
|
|
}
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
ret = trace_seq_printf(s, "((unsigned long)(REC->%s))%s",
|
|
entry->args[i],
|
|
i == entry->nb_args - 1 ? "\n" : ", ");
|
|
if (!ret)
|
|
return 0;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void ftrace_syscall_enter(struct pt_regs *regs, long id)
|
|
{
|
|
struct syscall_trace_enter *entry;
|
|
struct syscall_metadata *sys_data;
|
|
struct ring_buffer_event *event;
|
|
int size;
|
|
int syscall_nr;
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
if (!test_bit(syscall_nr, enabled_enter_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
|
|
|
|
event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
|
|
0, 0);
|
|
if (!event)
|
|
return;
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
entry->nr = syscall_nr;
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
|
|
|
|
trace_current_buffer_unlock_commit(event, 0, 0);
|
|
trace_wake_up();
|
|
}
|
|
|
|
void ftrace_syscall_exit(struct pt_regs *regs, long ret)
|
|
{
|
|
struct syscall_trace_exit *entry;
|
|
struct syscall_metadata *sys_data;
|
|
struct ring_buffer_event *event;
|
|
int syscall_nr;
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
if (!test_bit(syscall_nr, enabled_exit_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
event = trace_current_buffer_lock_reserve(sys_data->exit_id,
|
|
sizeof(*entry), 0, 0);
|
|
if (!event)
|
|
return;
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
entry->nr = syscall_nr;
|
|
entry->ret = syscall_get_return_value(current, regs);
|
|
|
|
trace_current_buffer_unlock_commit(event, 0, 0);
|
|
trace_wake_up();
|
|
}
|
|
|
|
int reg_event_syscall_enter(void *ptr)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
char *name;
|
|
|
|
name = (char *)ptr;
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return -ENOSYS;
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_refcount_enter)
|
|
ret = register_trace_syscall_enter(ftrace_syscall_enter);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall entry trace point");
|
|
} else {
|
|
set_bit(num, enabled_enter_syscalls);
|
|
sys_refcount_enter++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
void unreg_event_syscall_enter(void *ptr)
|
|
{
|
|
int num;
|
|
char *name;
|
|
|
|
name = (char *)ptr;
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return;
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_refcount_enter--;
|
|
clear_bit(num, enabled_enter_syscalls);
|
|
if (!sys_refcount_enter)
|
|
unregister_trace_syscall_enter(ftrace_syscall_enter);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
int reg_event_syscall_exit(void *ptr)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
char *name;
|
|
|
|
name = (char *)ptr;
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return -ENOSYS;
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_refcount_exit)
|
|
ret = register_trace_syscall_exit(ftrace_syscall_exit);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall exit trace point");
|
|
} else {
|
|
set_bit(num, enabled_exit_syscalls);
|
|
sys_refcount_exit++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
void unreg_event_syscall_exit(void *ptr)
|
|
{
|
|
int num;
|
|
char *name;
|
|
|
|
name = (char *)ptr;
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return;
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_refcount_exit--;
|
|
clear_bit(num, enabled_exit_syscalls);
|
|
if (!sys_refcount_exit)
|
|
unregister_trace_syscall_exit(ftrace_syscall_exit);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
struct trace_event event_syscall_enter = {
|
|
.trace = print_syscall_enter,
|
|
};
|
|
|
|
struct trace_event event_syscall_exit = {
|
|
.trace = print_syscall_exit,
|
|
};
|
|
|
|
#ifdef CONFIG_EVENT_PROFILE
|
|
|
|
struct syscall_enter_record {
|
|
struct trace_entry entry;
|
|
unsigned long args[0];
|
|
};
|
|
|
|
struct syscall_exit_record {
|
|
struct trace_entry entry;
|
|
unsigned long ret;
|
|
};
|
|
|
|
static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
|
|
static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
|
|
static int sys_prof_refcount_enter;
|
|
static int sys_prof_refcount_exit;
|
|
|
|
static void prof_syscall_enter(struct pt_regs *regs, long id)
|
|
{
|
|
struct syscall_enter_record *rec;
|
|
struct syscall_metadata *sys_data;
|
|
int syscall_nr;
|
|
int size;
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
/* get the size after alignment with the u32 buffer size field */
|
|
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
|
|
size = ALIGN(size + sizeof(u32), sizeof(u64));
|
|
size -= sizeof(u32);
|
|
|
|
do {
|
|
char raw_data[size];
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
|
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
|
|
|
|
rec = (struct syscall_enter_record *) raw_data;
|
|
tracing_generic_entry_update(&rec->entry, 0, 0);
|
|
rec->entry.type = sys_data->enter_id;
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
|
(unsigned long *)&rec->args);
|
|
perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
|
|
} while(0);
|
|
}
|
|
|
|
int reg_prof_syscall_enter(char *name)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return -ENOSYS;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_prof_refcount_enter)
|
|
ret = register_trace_syscall_enter(prof_syscall_enter);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall entry trace point");
|
|
} else {
|
|
set_bit(num, enabled_prof_enter_syscalls);
|
|
sys_prof_refcount_enter++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
void unreg_prof_syscall_enter(char *name)
|
|
{
|
|
int num;
|
|
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_prof_refcount_enter--;
|
|
clear_bit(num, enabled_prof_enter_syscalls);
|
|
if (!sys_prof_refcount_enter)
|
|
unregister_trace_syscall_enter(prof_syscall_enter);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
static void prof_syscall_exit(struct pt_regs *regs, long ret)
|
|
{
|
|
struct syscall_metadata *sys_data;
|
|
struct syscall_exit_record rec;
|
|
int syscall_nr;
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
|
|
return;
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
if (!sys_data)
|
|
return;
|
|
|
|
tracing_generic_entry_update(&rec.entry, 0, 0);
|
|
rec.entry.type = sys_data->exit_id;
|
|
rec.ret = syscall_get_return_value(current, regs);
|
|
|
|
perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
|
|
}
|
|
|
|
int reg_prof_syscall_exit(char *name)
|
|
{
|
|
int ret = 0;
|
|
int num;
|
|
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return -ENOSYS;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
if (!sys_prof_refcount_exit)
|
|
ret = register_trace_syscall_exit(prof_syscall_exit);
|
|
if (ret) {
|
|
pr_info("event trace: Could not activate"
|
|
"syscall entry trace point");
|
|
} else {
|
|
set_bit(num, enabled_prof_exit_syscalls);
|
|
sys_prof_refcount_exit++;
|
|
}
|
|
mutex_unlock(&syscall_trace_lock);
|
|
return ret;
|
|
}
|
|
|
|
void unreg_prof_syscall_exit(char *name)
|
|
{
|
|
int num;
|
|
|
|
num = syscall_name_to_nr(name);
|
|
if (num < 0 || num >= FTRACE_SYSCALL_MAX)
|
|
return;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
sys_prof_refcount_exit--;
|
|
clear_bit(num, enabled_prof_exit_syscalls);
|
|
if (!sys_prof_refcount_exit)
|
|
unregister_trace_syscall_exit(prof_syscall_exit);
|
|
mutex_unlock(&syscall_trace_lock);
|
|
}
|
|
|
|
#endif
|
|
|
|
|