linux/kernel/trace/trace_dynevent.c
Steven Rostedt (Google) 4313e5a613 tracing: Free buffers when a used dynamic event is removed
After 65536 dynamic events have been added and removed, the "type" field
of the event then uses the first type number that is available (not
currently used by other events). A type number is the identifier of the
binary blobs in the tracing ring buffer (known as events) to map them to
logic that can parse the binary blob.

The issue is that if a dynamic event (like a kprobe event) is traced and
is in the ring buffer, and then that event is removed (because it is
dynamic, which means it can be created and destroyed), if another dynamic
event is created that has the same number that new event's logic on
parsing the binary blob will be used.

To show how this can be an issue, the following can crash the kernel:

 # cd /sys/kernel/tracing
 # for i in `seq 65536`; do
     echo 'p:kprobes/foo do_sys_openat2 $arg1:u32' > kprobe_events
 # done

For every iteration of the above, the writing to the kprobe_events will
remove the old event and create a new one (with the same format) and
increase the type number to the next available on until the type number
reaches over 65535 which is the max number for the 16 bit type. After it
reaches that number, the logic to allocate a new number simply looks for
the next available number. When an dynamic event is removed, that number
is then available to be reused by the next dynamic event created. That is,
once the above reaches the max number, the number assigned to the event in
that loop will remain the same.

Now that means deleting one dynamic event and created another will reuse
the previous events type number. This is where bad things can happen.
After the above loop finishes, the kprobes/foo event which reads the
do_sys_openat2 function call's first parameter as an integer.

 # echo 1 > kprobes/foo/enable
 # cat /etc/passwd > /dev/null
 # cat trace
             cat-2211    [005] ....  2007.849603: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196
             cat-2211    [005] ....  2007.849620: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196
             cat-2211    [005] ....  2007.849838: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196
             cat-2211    [005] ....  2007.849880: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196
 # echo 0 > kprobes/foo/enable

Now if we delete the kprobe and create a new one that reads a string:

 # echo 'p:kprobes/foo do_sys_openat2 +0($arg2):string' > kprobe_events

And now we can the trace:

 # cat trace
        sendmail-1942    [002] .....   530.136320: foo: (do_sys_openat2+0x0/0x240) arg1=             cat-2046    [004] .....   530.930817: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������"
             cat-2046    [004] .....   530.930961: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������"
             cat-2046    [004] .....   530.934278: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������"
             cat-2046    [004] .....   530.934563: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������"
            bash-1515    [007] .....   534.299093: foo: (do_sys_openat2+0x0/0x240) arg1="kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk���������@��4Z����;Y�����U

And dmesg has:

==================================================================
BUG: KASAN: use-after-free in string+0xd4/0x1c0
Read of size 1 at addr ffff88805fdbbfa0 by task cat/2049

 CPU: 0 PID: 2049 Comm: cat Not tainted 6.1.0-rc6-test+ #641
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016
 Call Trace:
  <TASK>
  dump_stack_lvl+0x5b/0x77
  print_report+0x17f/0x47b
  kasan_report+0xad/0x130
  string+0xd4/0x1c0
  vsnprintf+0x500/0x840
  seq_buf_vprintf+0x62/0xc0
  trace_seq_printf+0x10e/0x1e0
  print_type_string+0x90/0xa0
  print_kprobe_event+0x16b/0x290
  print_trace_line+0x451/0x8e0
  s_show+0x72/0x1f0
  seq_read_iter+0x58e/0x750
  seq_read+0x115/0x160
  vfs_read+0x11d/0x460
  ksys_read+0xa9/0x130
  do_syscall_64+0x3a/0x90
  entry_SYSCALL_64_after_hwframe+0x63/0xcd
 RIP: 0033:0x7fc2e972ade2
 Code: c0 e9 b2 fe ff ff 50 48 8d 3d b2 3f 0a 00 e8 05 f0 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24
 RSP: 002b:00007ffc64e687c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fc2e972ade2
 RDX: 0000000000020000 RSI: 00007fc2e980d000 RDI: 0000000000000003
 RBP: 00007fc2e980d000 R08: 00007fc2e980c010 R09: 0000000000000000
 R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000020f00
 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
  </TASK>

 The buggy address belongs to the physical page:
 page:ffffea00017f6ec0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5fdbb
 flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff)
 raw: 000fffffc0000000 0000000000000000 ffffea00017f6ec8 0000000000000000
 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffff88805fdbbe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ffff88805fdbbf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 >ffff88805fdbbf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                                ^
  ffff88805fdbc000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ffff88805fdbc080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ==================================================================

This was found when Zheng Yejian sent a patch to convert the event type
number assignment to use IDA, which gives the next available number, and
this bug showed up in the fuzz testing by Yujie Liu and the kernel test
robot. But after further analysis, I found that this behavior is the same
as when the event type numbers go past the 16bit max (and the above shows
that).

As modules have a similar issue, but is dealt with by setting a
"WAS_ENABLED" flag when a module event is enabled, and when the module is
freed, if any of its events were enabled, the ring buffer that holds that
event is also cleared, to prevent reading stale events. The same can be
done for dynamic events.

If any dynamic event that is being removed was enabled, then make sure the
buffers they were enabled in are now cleared.

Link: https://lkml.kernel.org/r/20221123171434.545706e3@gandalf.local.home
Link: https://lore.kernel.org/all/20221110020319.1259291-1-zhengyejian1@huawei.com/

Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Depends-on: e18eb8783e ("tracing: Add tracing_reset_all_online_cpus_unlocked() function")
Depends-on: 5448d44c38 ("tracing: Add unified dynamic event framework")
Depends-on: 6212dd2968 ("tracing/kprobes: Use dyn_event framework for kprobe events")
Depends-on: 065e63f951 ("tracing: Only have rmmod clear buffers that its events were active in")
Depends-on: 575380da8b ("tracing: Only clear trace buffer on module unload if event was traced")
Fixes: 77b44d1b7c ("tracing/kprobes: Rename Kprobe-tracer to kprobe-event")
Reported-by: Zheng Yejian <zhengyejian1@huawei.com>
Reported-by: Yujie Liu <yujie.liu@intel.com>
Reported-by: kernel test robot <yujie.liu@intel.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2022-11-23 19:07:12 -05:00

484 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Generic dynamic event control interface
*
* Copyright (C) 2018 Masami Hiramatsu <mhiramat@kernel.org>
*/
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/tracefs.h>
#include "trace.h"
#include "trace_output.h" /* for trace_event_sem */
#include "trace_dynevent.h"
static DEFINE_MUTEX(dyn_event_ops_mutex);
static LIST_HEAD(dyn_event_ops_list);
bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
{
struct trace_event_call *call;
bool ret = false;
if (WARN_ON_ONCE(!(dyn_call->flags & TRACE_EVENT_FL_DYNAMIC)))
return false;
down_read(&trace_event_sem);
list_for_each_entry(call, &ftrace_events, list) {
if (call == dyn_call) {
atomic_inc(&dyn_call->refcnt);
ret = true;
}
}
up_read(&trace_event_sem);
return ret;
}
void trace_event_dyn_put_ref(struct trace_event_call *call)
{
if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_DYNAMIC)))
return;
if (WARN_ON_ONCE(atomic_read(&call->refcnt) <= 0)) {
atomic_set(&call->refcnt, 0);
return;
}
atomic_dec(&call->refcnt);
}
bool trace_event_dyn_busy(struct trace_event_call *call)
{
return atomic_read(&call->refcnt) != 0;
}
int dyn_event_register(struct dyn_event_operations *ops)
{
if (!ops || !ops->create || !ops->show || !ops->is_busy ||
!ops->free || !ops->match)
return -EINVAL;
INIT_LIST_HEAD(&ops->list);
mutex_lock(&dyn_event_ops_mutex);
list_add_tail(&ops->list, &dyn_event_ops_list);
mutex_unlock(&dyn_event_ops_mutex);
return 0;
}
int dyn_event_release(const char *raw_command, struct dyn_event_operations *type)
{
struct dyn_event *pos, *n;
char *system = NULL, *event, *p;
int argc, ret = -ENOENT;
char **argv;
argv = argv_split(GFP_KERNEL, raw_command, &argc);
if (!argv)
return -ENOMEM;
if (argv[0][0] == '-') {
if (argv[0][1] != ':') {
ret = -EINVAL;
goto out;
}
event = &argv[0][2];
} else {
event = strchr(argv[0], ':');
if (!event) {
ret = -EINVAL;
goto out;
}
event++;
}
p = strchr(event, '/');
if (p) {
system = event;
event = p + 1;
*p = '\0';
}
if (!system && event[0] == '\0') {
ret = -EINVAL;
goto out;
}
mutex_lock(&event_mutex);
for_each_dyn_event_safe(pos, n) {
if (type && type != pos->ops)
continue;
if (!pos->ops->match(system, event,
argc - 1, (const char **)argv + 1, pos))
continue;
ret = pos->ops->free(pos);
if (ret)
break;
}
tracing_reset_all_online_cpus();
mutex_unlock(&event_mutex);
out:
argv_free(argv);
return ret;
}
static int create_dyn_event(const char *raw_command)
{
struct dyn_event_operations *ops;
int ret = -ENODEV;
if (raw_command[0] == '-' || raw_command[0] == '!')
return dyn_event_release(raw_command, NULL);
mutex_lock(&dyn_event_ops_mutex);
list_for_each_entry(ops, &dyn_event_ops_list, list) {
ret = ops->create(raw_command);
if (!ret || ret != -ECANCELED)
break;
}
mutex_unlock(&dyn_event_ops_mutex);
if (ret == -ECANCELED)
ret = -EINVAL;
return ret;
}
/* Protected by event_mutex */
LIST_HEAD(dyn_event_list);
void *dyn_event_seq_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&event_mutex);
return seq_list_start(&dyn_event_list, *pos);
}
void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &dyn_event_list, pos);
}
void dyn_event_seq_stop(struct seq_file *m, void *v)
{
mutex_unlock(&event_mutex);
}
static int dyn_event_seq_show(struct seq_file *m, void *v)
{
struct dyn_event *ev = v;
if (ev && ev->ops)
return ev->ops->show(m, ev);
return 0;
}
static const struct seq_operations dyn_event_seq_op = {
.start = dyn_event_seq_start,
.next = dyn_event_seq_next,
.stop = dyn_event_seq_stop,
.show = dyn_event_seq_show
};
/*
* dyn_events_release_all - Release all specific events
* @type: the dyn_event_operations * which filters releasing events
*
* This releases all events which ->ops matches @type. If @type is NULL,
* all events are released.
* Return -EBUSY if any of them are in use, and return other errors when
* it failed to free the given event. Except for -EBUSY, event releasing
* process will be aborted at that point and there may be some other
* releasable events on the list.
*/
int dyn_events_release_all(struct dyn_event_operations *type)
{
struct dyn_event *ev, *tmp;
int ret = 0;
mutex_lock(&event_mutex);
for_each_dyn_event(ev) {
if (type && ev->ops != type)
continue;
if (ev->ops->is_busy(ev)) {
ret = -EBUSY;
goto out;
}
}
for_each_dyn_event_safe(ev, tmp) {
if (type && ev->ops != type)
continue;
ret = ev->ops->free(ev);
if (ret)
break;
}
out:
tracing_reset_all_online_cpus();
mutex_unlock(&event_mutex);
return ret;
}
static int dyn_event_open(struct inode *inode, struct file *file)
{
int ret;
ret = tracing_check_open_get_tr(NULL);
if (ret)
return ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = dyn_events_release_all(NULL);
if (ret < 0)
return ret;
}
return seq_open(file, &dyn_event_seq_op);
}
static ssize_t dyn_event_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
return trace_parse_run_command(file, buffer, count, ppos,
create_dyn_event);
}
static const struct file_operations dynamic_events_ops = {
.owner = THIS_MODULE,
.open = dyn_event_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.write = dyn_event_write,
};
/* Make a tracefs interface for controlling dynamic events */
static __init int init_dynamic_event(void)
{
int ret;
ret = tracing_init_dentry();
if (ret)
return 0;
trace_create_file("dynamic_events", TRACE_MODE_WRITE, NULL,
NULL, &dynamic_events_ops);
return 0;
}
fs_initcall(init_dynamic_event);
/**
* dynevent_arg_add - Add an arg to a dynevent_cmd
* @cmd: A pointer to the dynevent_cmd struct representing the new event cmd
* @arg: The argument to append to the current cmd
* @check_arg: An (optional) pointer to a function checking arg sanity
*
* Append an argument to a dynevent_cmd. The argument string will be
* appended to the current cmd string, followed by a separator, if
* applicable. Before the argument is added, the @check_arg function,
* if present, will be used to check the sanity of the current arg
* string.
*
* The cmd string and separator should be set using the
* dynevent_arg_init() before any arguments are added using this
* function.
*
* Return: 0 if successful, error otherwise.
*/
int dynevent_arg_add(struct dynevent_cmd *cmd,
struct dynevent_arg *arg,
dynevent_check_arg_fn_t check_arg)
{
int ret = 0;
if (check_arg) {
ret = check_arg(arg);
if (ret)
return ret;
}
ret = seq_buf_printf(&cmd->seq, " %s%c", arg->str, arg->separator);
if (ret) {
pr_err("String is too long: %s%c\n", arg->str, arg->separator);
return -E2BIG;
}
return ret;
}
/**
* dynevent_arg_pair_add - Add an arg pair to a dynevent_cmd
* @cmd: A pointer to the dynevent_cmd struct representing the new event cmd
* @arg_pair: The argument pair to append to the current cmd
* @check_arg: An (optional) pointer to a function checking arg sanity
*
* Append an argument pair to a dynevent_cmd. An argument pair
* consists of a left-hand-side argument and a right-hand-side
* argument separated by an operator, which can be whitespace, all
* followed by a separator, if applicable. This can be used to add
* arguments of the form 'type variable_name;' or 'x+y'.
*
* The lhs argument string will be appended to the current cmd string,
* followed by an operator, if applicable, followed by the rhs string,
* followed finally by a separator, if applicable. Before the
* argument is added, the @check_arg function, if present, will be
* used to check the sanity of the current arg strings.
*
* The cmd strings, operator, and separator should be set using the
* dynevent_arg_pair_init() before any arguments are added using this
* function.
*
* Return: 0 if successful, error otherwise.
*/
int dynevent_arg_pair_add(struct dynevent_cmd *cmd,
struct dynevent_arg_pair *arg_pair,
dynevent_check_arg_fn_t check_arg)
{
int ret = 0;
if (check_arg) {
ret = check_arg(arg_pair);
if (ret)
return ret;
}
ret = seq_buf_printf(&cmd->seq, " %s%c%s%c", arg_pair->lhs,
arg_pair->operator, arg_pair->rhs,
arg_pair->separator);
if (ret) {
pr_err("field string is too long: %s%c%s%c\n", arg_pair->lhs,
arg_pair->operator, arg_pair->rhs,
arg_pair->separator);
return -E2BIG;
}
return ret;
}
/**
* dynevent_str_add - Add a string to a dynevent_cmd
* @cmd: A pointer to the dynevent_cmd struct representing the new event cmd
* @str: The string to append to the current cmd
*
* Append a string to a dynevent_cmd. The string will be appended to
* the current cmd string as-is, with nothing prepended or appended.
*
* Return: 0 if successful, error otherwise.
*/
int dynevent_str_add(struct dynevent_cmd *cmd, const char *str)
{
int ret = 0;
ret = seq_buf_puts(&cmd->seq, str);
if (ret) {
pr_err("String is too long: %s\n", str);
return -E2BIG;
}
return ret;
}
/**
* dynevent_cmd_init - Initialize a dynevent_cmd object
* @cmd: A pointer to the dynevent_cmd struct representing the cmd
* @buf: A pointer to the buffer to generate the command into
* @maxlen: The length of the buffer the command will be generated into
* @type: The type of the cmd, checked against further operations
* @run_command: The type-specific function that will actually run the command
*
* Initialize a dynevent_cmd. A dynevent_cmd is used to build up and
* run dynamic event creation commands, such as commands for creating
* synthetic and kprobe events. Before calling any of the functions
* used to build the command, a dynevent_cmd object should be
* instantiated and initialized using this function.
*
* The initialization sets things up by saving a pointer to the
* user-supplied buffer and its length via the @buf and @maxlen
* params, and by saving the cmd-specific @type and @run_command
* params which are used to check subsequent dynevent_cmd operations
* and actually run the command when complete.
*/
void dynevent_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen,
enum dynevent_type type,
dynevent_create_fn_t run_command)
{
memset(cmd, '\0', sizeof(*cmd));
seq_buf_init(&cmd->seq, buf, maxlen);
cmd->type = type;
cmd->run_command = run_command;
}
/**
* dynevent_arg_init - Initialize a dynevent_arg object
* @arg: A pointer to the dynevent_arg struct representing the arg
* @separator: An (optional) separator, appended after adding the arg
*
* Initialize a dynevent_arg object. A dynevent_arg represents an
* object used to append single arguments to the current command
* string. After the arg string is successfully appended to the
* command string, the optional @separator is appended. If no
* separator was specified when initializing the arg, a space will be
* appended.
*/
void dynevent_arg_init(struct dynevent_arg *arg,
char separator)
{
memset(arg, '\0', sizeof(*arg));
if (!separator)
separator = ' ';
arg->separator = separator;
}
/**
* dynevent_arg_pair_init - Initialize a dynevent_arg_pair object
* @arg_pair: A pointer to the dynevent_arg_pair struct representing the arg
* @operator: An (optional) operator, appended after adding the first arg
* @separator: An (optional) separator, appended after adding the second arg
*
* Initialize a dynevent_arg_pair object. A dynevent_arg_pair
* represents an object used to append argument pairs such as 'type
* variable_name;' or 'x+y' to the current command string. An
* argument pair consists of a left-hand-side argument and a
* right-hand-side argument separated by an operator, which can be
* whitespace, all followed by a separator, if applicable. After the
* first arg string is successfully appended to the command string,
* the optional @operator is appended, followed by the second arg and
* optional @separator. If no separator was specified when
* initializing the arg, a space will be appended.
*/
void dynevent_arg_pair_init(struct dynevent_arg_pair *arg_pair,
char operator, char separator)
{
memset(arg_pair, '\0', sizeof(*arg_pair));
if (!operator)
operator = ' ';
arg_pair->operator = operator;
if (!separator)
separator = ' ';
arg_pair->separator = separator;
}
/**
* dynevent_create - Create the dynamic event contained in dynevent_cmd
* @cmd: The dynevent_cmd object containing the dynamic event creation command
*
* Once a dynevent_cmd object has been successfully built up via the
* dynevent_cmd_init(), dynevent_arg_add() and dynevent_arg_pair_add()
* functions, this function runs the final command to actually create
* the event.
*
* Return: 0 if the event was successfully created, error otherwise.
*/
int dynevent_create(struct dynevent_cmd *cmd)
{
return cmd->run_command(cmd);
}
EXPORT_SYMBOL_GPL(dynevent_create);