diff --git a/Documentation/trace/ftrace-design.rst b/Documentation/trace/ftrace-design.rst index 6893399157f0..dc82d64b3a44 100644 --- a/Documentation/trace/ftrace-design.rst +++ b/Documentation/trace/ftrace-design.rst @@ -217,18 +217,6 @@ along to ftrace_push_return_trace() instead of a stub value of 0. Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. -HAVE_FUNCTION_GRAPH_RET_ADDR_PTR --------------------------------- - -An arch may pass in a pointer to the return address on the stack. This -prevents potential stack unwinding issues where the unwinder gets out of -sync with ret_stack and the wrong addresses are reported by -ftrace_graph_ret_addr(). - -Adding support for it is easy: just define the macro in asm/ftrace.h and -pass the return address pointer as the 'retp' argument to -ftrace_push_return_trace(). - HAVE_SYSCALL_TRACEPOINTS ------------------------ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index ab158196480c..dc9cf0bd2a4c 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,17 +12,6 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -/* - * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a - * "return address pointer" which can be used to uniquely identify a return - * address which has been overwritten. - * - * On arm64 we use the address of the caller's frame record, which remains the - * same for the lifetime of the instrumented function, unlike the return - * address in the LR. - */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h index fd215c38ef27..00f9f7647e3f 100644 --- a/arch/csky/include/asm/ftrace.h +++ b/arch/csky/include/asm/ftrace.h @@ -7,8 +7,6 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #define ARCH_SUPPORTS_FTRACE_OPS 1 #define MCOUNT_ADDR ((unsigned long)_mcount) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index de891c2c83d4..c0a682808e07 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -28,7 +28,6 @@ struct dyn_ftrace; struct dyn_arch_ftrace { }; #define ARCH_SUPPORTS_FTRACE_OPS 1 -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 107fc5a48456..559560286e6d 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -8,8 +8,6 @@ #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - /* Ignore unused weak functions which will have larger offsets */ #if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) #define FTRACE_MCOUNT_MAX_OFFSET 16 diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 9eb31a7ea0aa..2cddd79ff21b 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -11,7 +11,6 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 77e479d44f1e..fbadca645af7 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,7 +2,6 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #define MCOUNT_INSN_SIZE 6 diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 897cf02c20b1..0152a81d9b4a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -20,8 +20,6 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #ifndef __ASSEMBLY__ extern void __fentry__(void); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b792274189a3..51575b76818e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -227,6 +227,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ftrace_enabled. * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) + * SUBOP - Is controlled by another op in field managed. */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -247,6 +248,7 @@ enum { FTRACE_OPS_FL_TRACE_ARRAY = BIT(15), FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), + FTRACE_OPS_FL_SUBOP = BIT(18), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -334,7 +336,9 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + struct list_head subop_list; ftrace_ops_func_t ops_func; + struct ftrace_ops *managed; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS unsigned long direct_call; #endif @@ -509,6 +513,15 @@ static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } #endif +enum { + FTRACE_UPDATE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_START_FUNC_RET = (1 << 3), + FTRACE_STOP_FUNC_RET = (1 << 4), + FTRACE_MAY_SLEEP = (1 << 5), +}; + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -603,15 +616,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); void ftrace_free_filter(struct ftrace_ops *ops); void ftrace_ops_set_global_filter(struct ftrace_ops *ops); -enum { - FTRACE_UPDATE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_START_FUNC_RET = (1 << 3), - FTRACE_STOP_FUNC_RET = (1 << 4), - FTRACE_MAY_SLEEP = (1 << 5), -}; - /* * The FTRACE_UPDATE_* enum is used to pass information back * from the ftrace_update_record() and ftrace_test_record() @@ -1027,19 +1031,31 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; -/* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +struct fgraph_ops; -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, + struct fgraph_ops *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, + struct fgraph_ops *); /* entry */ + +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + struct ftrace_ops ops; /* for the hash lists */ + void *private; + trace_func_graph_ent_t saved_func; + int idx; }; +void *fgraph_reserve_data(int idx, int size_bytes); +void *fgraph_retrieve_data(int idx, int *size_bytes); + /* * Stack of return addresses for functions * of a thread. @@ -1055,9 +1071,7 @@ struct ftrace_ret_stack { #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long *retp; -#endif }; /* @@ -1072,10 +1086,11 @@ function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp); struct ftrace_ret_stack * -ftrace_graph_get_ret_stack(struct task_struct *task, int idx); +ftrace_graph_get_ret_stack(struct task_struct *task, int skip); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); /* * Sometimes we don't want to trace a function with the function @@ -1114,6 +1129,9 @@ extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); +/* Used by assembly, but to quiet sparse warnings */ +extern struct ftrace_ops *function_trace_op; + static inline void pause_graph_tracing(void) { atomic_inc(¤t->tracing_graph_pause); diff --git a/include/linux/sched.h b/include/linux/sched.h index 33dd8d9d2b85..e330ee0205c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1413,7 +1413,7 @@ struct task_struct { int curr_ret_depth; /* Stack of return addresses for return function tracing: */ - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 24ea8ac049b4..ae04054a1be3 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,35 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* Set if the function is in the set_graph_function file */ - TRACE_GRAPH_BIT, - - /* - * In the very unlikely case that an interrupt came in - * at a start of graph tracing, and we want to trace - * the function in that interrupt, the depth can be greater - * than zero, because of the preempted start of a previous - * trace. In an even more unlikely case, depth could be 2 - * if a softirq interrupted the start of graph tracing, - * followed by an interrupt preempting a start of graph - * tracing in the softirq, and depth can even be 3 - * if an NMI came in at the start of an interrupt function - * that preempted a softirq start of a function that - * preempted normal context!!!! Luckily, it can't be - * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_BIT - */ - - TRACE_GRAPH_DEPTH_START_BIT, - TRACE_GRAPH_DEPTH_END_BIT, - - /* - * To implement set_graph_notrace, if this bit is set, we ignore - * function graph tracing of called functions, until the return - * function is called to clear it. - */ - TRACE_GRAPH_NOTRACE_BIT, - /* Used to prevent recursion recording from recursing. */ TRACE_RECORD_RECURSION_BIT, }; @@ -81,16 +52,6 @@ enum { #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) -#define trace_recursion_depth() \ - (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) -#define trace_recursion_set_depth(depth) \ - do { \ - current->trace_recursion &= \ - ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ - current->trace_recursion |= \ - ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ - } while (0) - #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a130b2d898f7..fc205ad167a9 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -7,9 +7,11 @@ * * Highly modified by Steven Rostedt (VMware). */ +#include #include #include #include +#include #include #include @@ -17,17 +19,447 @@ #include "ftrace_internal.h" #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#else -#define ASSIGN_OPS_HASH(opsname, val) -#endif +/* + * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack + * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame + */ +#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) +#define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long)) + +/* + * On entry to a function (via function_graph_enter()), a new fgraph frame + * (ftrace_ret_stack) is pushed onto the stack as well as a word that + * holds a bitmask and a type (called "bitmap"). The bitmap is defined as: + * + * bits: 0 - 9 offset in words from the previous ftrace_ret_stack + * + * bits: 10 - 11 Type of storage + * 0 - reserved + * 1 - bitmap of fgraph_array index + * 2 - reserved data + * + * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP): + * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index + * That is, it's a bitmask of 0-15 (16 bits) + * where if a corresponding ops in the fgraph_array[] + * expects a callback from the return of the function + * it's corresponding bit will be set. + * + * + * The top of the ret_stack (when not empty) will always have a reference + * word that points to the last fgraph frame that was saved. + * + * For reserved data: + * bits: 12 - 17 The size in words that is stored + * bits: 18 - 23 The index of fgraph_array, which shows who is stored + * + * That is, at the end of function_graph_enter, if the first and forth + * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called + * on the return of the function being traced, and the forth fgraph_ops + * stored two words of data, this is what will be on the task's shadow + * ret_stack: (the stack grows upward) + * + * ret_stack[SHADOW_STACK_OFFSET] + * | SHADOW_STACK_TASK_VARS(ret_stack)[15] | + * ... + * | SHADOW_STACK_TASK_VARS(ret_stack)[0] | + * ret_stack[SHADOW_STACK_MAX_OFFSET] + * ... + * | | <- task->curr_ret_stack + * +--------------------------------------------+ + * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3]. + * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words. + * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ | + * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here + * +--------------------------------------------+ ( It is 4 words from the ret_stack) + * | STORED DATA WORD 2 | + * | STORED DATA WORD 1 | + * +--------------------------------------------+ + * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ | + * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ | + * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here + * +--------------------------------------------+ + * | struct ftrace_ret_stack | + * | (stores the saved ret pointer) | <- the offset points here + * +--------------------------------------------+ + * | (X) | (N) | ( N words away from + * | | previous ret_stack) + * ... + * ret_stack[0] + * + * If a backtrace is required, and the real return pointer needs to be + * fetched, then it looks at the task's curr_ret_stack offset, if it + * is greater than zero (reserved, or right before popped), it would mask + * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the + * ftrace_ret_stack structure stored on the shadow stack. + */ + +/* + * The following is for the top word on the stack: + * + * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame + * FGRAPH_TYPE (10-11) holds the type of word this is. + * (RESERVED or BITMAP) + */ +#define FGRAPH_FRAME_OFFSET_BITS 10 +#define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0) + +#define FGRAPH_TYPE_BITS 2 +#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0) +#define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS + +enum { + FGRAPH_TYPE_RESERVED = 0, + FGRAPH_TYPE_BITMAP = 1, + FGRAPH_TYPE_DATA = 2, +}; + +/* + * For BITMAP type: + * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called + */ +#define FGRAPH_INDEX_BITS 16 +#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0) +#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) + +/* + * For DATA type: + * FGRAPH_DATA (12-17) bits hold the size of data (in words) + * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for + * + * Note: + * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words. + */ +#define FGRAPH_DATA_BITS 5 +#define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0) +#define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) +#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS)) + +#define FGRAPH_DATA_INDEX_BITS 4 +#define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0) +#define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS) + +#define FGRAPH_MAX_INDEX \ + ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX) + +#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS + +/* + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added + */ +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_OFFSET \ + (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE)) + +/* RET_STACK(): Return the frame from a given @offset from task @t */ +#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) + +/* + * Each fgraph_ops has a reservered unsigned long at the end (top) of the + * ret_stack to store task specific state. + */ +#define SHADOW_STACK_TASK_VARS(ret_stack) \ + ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE])) DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; +static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; +static unsigned long fgraph_array_bitmask; + +/* LRU index table for fgraph_array */ +static int fgraph_lru_table[FGRAPH_ARRAY_SIZE]; +static int fgraph_lru_next; +static int fgraph_lru_last; + +/* Initialize fgraph_lru_table with unused index */ +static void fgraph_lru_init(void) +{ + int i; + + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) + fgraph_lru_table[i] = i; +} + +/* Release the used index to the LRU table */ +static int fgraph_lru_release_index(int idx) +{ + if (idx < 0 || idx >= FGRAPH_ARRAY_SIZE || + WARN_ON_ONCE(fgraph_lru_table[fgraph_lru_last] != -1)) + return -1; + + fgraph_lru_table[fgraph_lru_last] = idx; + fgraph_lru_last = (fgraph_lru_last + 1) % FGRAPH_ARRAY_SIZE; + + clear_bit(idx, &fgraph_array_bitmask); + return 0; +} + +/* Allocate a new index from LRU table */ +static int fgraph_lru_alloc_index(void) +{ + int idx = fgraph_lru_table[fgraph_lru_next]; + + /* No id is available */ + if (idx == -1) + return -1; + + fgraph_lru_table[fgraph_lru_next] = -1; + fgraph_lru_next = (fgraph_lru_next + 1) % FGRAPH_ARRAY_SIZE; + + set_bit(idx, &fgraph_array_bitmask); + return idx; +} + +/* Get the offset to the fgraph frame from a ret_stack value */ +static inline int __get_offset(unsigned long val) +{ + return val & FGRAPH_FRAME_OFFSET_MASK; +} + +/* Get the type of word from a ret_stack value */ +static inline int __get_type(unsigned long val) +{ + return (val >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; +} + +/* Get the data_index for a DATA type ret_stack word */ +static inline int __get_data_index(unsigned long val) +{ + return (val >> FGRAPH_DATA_INDEX_SHIFT) & FGRAPH_DATA_INDEX_MASK; +} + +/* Get the data_size for a DATA type ret_stack word */ +static inline int __get_data_size(unsigned long val) +{ + return ((val >> FGRAPH_DATA_SHIFT) & FGRAPH_DATA_MASK) + 1; +} + +/* Get the word from the ret_stack at @offset */ +static inline unsigned long get_fgraph_entry(struct task_struct *t, int offset) +{ + return t->ret_stack[offset]; +} + +/* Get the FRAME_OFFSET from the word from the @offset on ret_stack */ +static inline int get_frame_offset(struct task_struct *t, int offset) +{ + return __get_offset(t->ret_stack[offset]); +} + +/* For BITMAP type: get the bitmask from the @offset at ret_stack */ +static inline unsigned long +get_bitmap_bits(struct task_struct *t, int offset) +{ + return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK; +} + +/* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */ +static inline void +set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) +{ + t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) | + (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; +} + +/* For DATA type: get the data saved under the ret_stack word at @offset */ +static inline void *get_data_type_data(struct task_struct *t, int offset) +{ + unsigned long val = t->ret_stack[offset]; + + if (__get_type(val) != FGRAPH_TYPE_DATA) + return NULL; + offset -= __get_data_size(val); + return (void *)&t->ret_stack[offset]; +} + +/* Create the ret_stack word for a DATA type */ +static inline unsigned long make_data_type_val(int idx, int size, int offset) +{ + return (idx << FGRAPH_DATA_INDEX_SHIFT) | + ((size - 1) << FGRAPH_DATA_SHIFT) | + (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT) | offset; +} + +/* ftrace_graph_entry set to this to tell some archs to run function graph */ +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +{ + return 0; +} + +/* ftrace_graph_return set to this to tell some archs to run function graph */ +static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) +{ +} + +static void ret_stack_set_task_var(struct task_struct *t, int idx, long val) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + gvals[idx] = val; +} + +static unsigned long * +ret_stack_get_task_var(struct task_struct *t, int idx) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + return &gvals[idx]; +} + +static void ret_stack_init_task_vars(unsigned long *ret_stack) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack); + + memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE); +} + +/** + * fgraph_reserve_data - Reserve storage on the task's ret_stack + * @idx: The index of fgraph_array + * @size_bytes: The size in bytes to reserve + * + * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the + * task's ret_stack shadow stack, for a given fgraph_ops during + * the entryfunc() call. If entryfunc() returns zero, the storage + * is discarded. An entryfunc() can only call this once per iteration. + * The fgraph_ops retfunc() can retrieve this stored data with + * fgraph_retrieve_data(). + * + * Returns: On success, a pointer to the data on the stack. + * Otherwise, NULL if there's not enough space left on the + * ret_stack for the data, or if fgraph_reserve_data() was called + * more than once for a single entryfunc() call. + */ +void *fgraph_reserve_data(int idx, int size_bytes) +{ + unsigned long val; + void *data; + int curr_ret_stack = current->curr_ret_stack; + int data_size; + + if (size_bytes > FGRAPH_MAX_DATA_SIZE) + return NULL; + + /* Convert the data size to number of longs. */ + data_size = (size_bytes + sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3); + + val = get_fgraph_entry(current, curr_ret_stack - 1); + data = ¤t->ret_stack[curr_ret_stack]; + + curr_ret_stack += data_size + 1; + if (unlikely(curr_ret_stack >= SHADOW_STACK_MAX_OFFSET)) + return NULL; + + val = make_data_type_val(idx, data_size, __get_offset(val) + data_size + 1); + + /* Set the last word to be reserved */ + current->ret_stack[curr_ret_stack - 1] = val; + + /* Make sure interrupts see this */ + barrier(); + current->curr_ret_stack = curr_ret_stack; + /* Again sync with interrupts, and reset reserve */ + current->ret_stack[curr_ret_stack - 1] = val; + + return data; +} + +/** + * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data() + * @idx: the index of fgraph_array (fgraph_ops::idx) + * @size_bytes: pointer to retrieved data size. + * + * This is to be called by a fgraph_ops retfunc(), to retrieve data that + * was stored by the fgraph_ops entryfunc() on the function entry. + * That is, this will retrieve the data that was reserved on the + * entry of the function that corresponds to the exit of the function + * that the fgraph_ops retfunc() is called on. + * + * Returns: The stored data from fgraph_reserve_data() called by the + * matching entryfunc() for the retfunc() this is called from. + * Or NULL if there was nothing stored. + */ +void *fgraph_retrieve_data(int idx, int *size_bytes) +{ + int offset = current->curr_ret_stack - 1; + unsigned long val; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + +/** + * fgraph_get_task_var - retrieve a task specific state variable + * @gops: The ftrace_ops that owns the task specific variable + * + * Every registered fgraph_ops has a task state variable + * reserved on the task's ret_stack. This function returns the + * address to that variable. + * + * Returns the address to the fgraph_ops @gops tasks specific + * unsigned long variable. + */ +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops) +{ + return ret_stack_get_task_var(current, gops->idx); +} + +/* + * @offset: The offset into @t->ret_stack to find the ret_stack entry + * @frame_offset: Where to place the offset into @t->ret_stack of that entry + * + * Returns a pointer to the previous ret_stack below @offset or NULL + * when it reaches the bottom of the stack. + * + * Calling this with: + * + * offset = task->curr_ret_stack; + * do { + * ret_stack = get_ret_stack(task, offset, &offset); + * } while (ret_stack); + * + * Will iterate through all the ret_stack entries from curr_ret_stack + * down to the first one. + */ +static inline struct ftrace_ret_stack * +get_ret_stack(struct task_struct *t, int offset, int *frame_offset) +{ + int offs; + + BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long)); + + if (unlikely(offset <= 0)) + return NULL; + + offs = get_frame_offset(t, --offset); + if (WARN_ON_ONCE(offs <= 0 || offs > offset)) + return NULL; + + offset -= offs; + + *frame_offset = offset; + return RET_STACK(t, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; @@ -51,6 +483,27 @@ int __weak ftrace_disable_ftrace_graph_caller(void) } #endif +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + return 0; +} + +static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ +} + +static struct fgraph_ops fgraph_stub = { + .entryfunc = ftrace_graph_entry_stub, + .retfunc = ftrace_graph_ret_stub, +}; + +static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub; +DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub); +DEFINE_STATIC_CALL(fgraph_retfunc, ftrace_graph_ret_stub); +static DEFINE_STATIC_KEY_TRUE(fgraph_do_direct); + /** * ftrace_graph_stop - set to permanently disable function graph tracing * @@ -67,10 +520,13 @@ void ftrace_graph_stop(void) /* Add a function return address to the trace stack on thread info.*/ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp) + unsigned long frame_pointer, unsigned long *retp, + int fgraph_idx) { + struct ftrace_ret_stack *ret_stack; unsigned long long calltime; - int index; + unsigned long val; + int offset; if (unlikely(ftrace_graph_is_dead())) return -EBUSY; @@ -78,32 +534,67 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, if (!current->ret_stack) return -EBUSY; + BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long)); + + /* Set val to "reserved" with the delta to the new fgraph frame */ + val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; + /* * We must make sure the ret_stack is tested before we read * anything else. */ smp_rmb(); - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + /* + * Check if there's room on the shadow stack to fit a fraph frame + * and a bitmap word. + */ + if (current->curr_ret_stack + FGRAPH_FRAME_OFFSET + 1 >= SHADOW_STACK_MAX_OFFSET) { atomic_inc(¤t->trace_overrun); return -EBUSY; } calltime = trace_clock_local(); - index = ++current->curr_ret_stack; + offset = READ_ONCE(current->curr_ret_stack); + ret_stack = RET_STACK(current, offset); + offset += FGRAPH_FRAME_OFFSET; + + /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */ + current->ret_stack[offset] = val; + ret_stack->ret = ret; + /* + * The unwinders expect curr_ret_stack to point to either zero + * or an offset where to find the next ret_stack. Even though the + * ret stack might be bogus, we want to write the ret and the + * offset to find the ret_stack before we increment the stack point. + * If an interrupt comes in now before we increment the curr_ret_stack + * it may blow away what we wrote. But that's fine, because the + * offset will still be correct (even though the 'ret' won't be). + * What we worry about is the offset being correct after we increment + * the curr_ret_stack and before we update that offset, as if an + * interrupt comes in and does an unwind stack dump, it will need + * at least a correct offset! + */ barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = calltime; + WRITE_ONCE(current->curr_ret_stack, offset + 1); + /* + * This next barrier is to ensure that an interrupt coming in + * will not corrupt what we are about to write. + */ + barrier(); + + /* Still keep it reserved even if an interrupt came in */ + current->ret_stack[offset] = val; + + ret_stack->ret = ret; + ret_stack->func = func; + ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - current->ret_stack[index].fp = frame_pointer; + ret_stack->fp = frame_pointer; #endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - current->ret_stack[index].retp = retp; -#endif - return 0; + ret_stack->retp = retp; + return offset; } /* @@ -120,44 +611,85 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, # define MCOUNT_INSN_SIZE 0 #endif +/* If the caller does not use ftrace, call this function. */ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + unsigned long bitmap = 0; + int offset; + int i; trace.func = func; trace.depth = ++current->curr_ret_depth; - if (ftrace_push_return_trace(ret, func, frame_pointer, retp)) + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); + if (offset < 0) goto out; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + int save_curr_ret_stack = current->curr_ret_stack; + + if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + bitmap |= BIT(fgraph_direct_gops->idx); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } else +#endif + { + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]); + int save_curr_ret_stack; + + if (gops == &fgraph_stub) + continue; + + save_curr_ret_stack = current->curr_ret_stack; + if (ftrace_ops_test(&gops->ops, func, NULL) && + gops->entryfunc(&trace, gops)) + bitmap |= BIT(i); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } + } + + if (!bitmap) goto out_ret; + /* + * Since this function uses fgraph_idx = 0 as a tail-call checking + * flag, set that bit always. + */ + set_bitmap(current, offset, bitmap | BIT(0)); + return 0; out_ret: - current->curr_ret_stack--; + current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; out: current->curr_ret_depth--; return -EBUSY; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void +static struct ftrace_ret_stack * ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, - unsigned long frame_pointer) + unsigned long frame_pointer, int *offset) { - int index; + struct ftrace_ret_stack *ret_stack; - index = current->curr_ret_stack; + ret_stack = get_ret_stack(current, current->curr_ret_stack, offset); - if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) { + if (unlikely(!ret_stack)) { ftrace_graph_stop(); - WARN_ON(1); + WARN(1, "Bad function graph ret_stack pointer: %d", + current->curr_ret_stack); /* Might as well panic, otherwise we have no where to go */ *ret = (unsigned long)panic; - return; + return NULL; } #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -175,30 +707,33 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * Note, -mfentry does not use frame pointers, and this test * is not needed if CC_USING_FENTRY is set. */ - if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + if (unlikely(ret_stack->fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", - current->ret_stack[index].fp, + ret_stack->fp, frame_pointer, - (void *)current->ret_stack[index].func, - current->ret_stack[index].ret); + (void *)ret_stack->func, + ret_stack->ret); *ret = (unsigned long)panic; - return; + return NULL; } #endif - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; + *offset += FGRAPH_FRAME_OFFSET; + *ret = ret_stack->ret; + trace->func = ret_stack->func; + trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = current->curr_ret_depth--; + trace->depth = current->curr_ret_depth; /* * We still want to trace interrupts coming in if * max_depth is set to 1. Make sure the decrement is * seen before ftrace_graph_return. */ barrier(); + + return ret_stack; } /* @@ -236,30 +771,55 @@ struct fgraph_ret_regs; static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; struct ftrace_graph_ret trace; + unsigned long bitmap; unsigned long ret; + int offset; + int i; - ftrace_pop_return_trace(&trace, &ret, frame_pointer); + ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); + + if (unlikely(!ret_stack)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + return (unsigned long)panic; + } + + trace.rettime = trace_clock_local(); #ifdef CONFIG_FUNCTION_GRAPH_RETVAL trace.retval = fgraph_ret_regs_return_value(ret_regs); #endif - trace.rettime = trace_clock_local(); - ftrace_graph_return(&trace); + + bitmap = get_bitmap_bits(current, offset); + +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + if (test_bit(fgraph_direct_gops->idx, &bitmap)) + static_call(fgraph_retfunc)(&trace, fgraph_direct_gops); + } else +#endif + { + for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + gops->retfunc(&trace, gops); + } + } + /* * The ftrace_graph_return() may still access the current * ret_stack structure, we need to make sure the update of * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack--; - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } + current->curr_ret_stack = offset - FGRAPH_FRAME_OFFSET; + current->curr_ret_depth--; return ret; } @@ -282,7 +842,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) /** * ftrace_graph_get_ret_stack - return the entry of the shadow stack - * @task: The task to read the shadow stack from + * @task: The task to read the shadow stack from. * @idx: Index down the shadow stack * * Return the ret_struct on the shadow stack of the @task at the @@ -294,104 +854,116 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - idx = task->curr_ret_stack - idx; + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; - if (idx >= 0 && idx <= task->curr_ret_stack) - return &task->ret_stack[idx]; + if (offset < 0) + return NULL; - return NULL; + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && --idx >= 0); + + return ret_stack; } /** - * ftrace_graph_ret_addr - convert a potentially modified stack return address - * to its original value + * ftrace_graph_ret_addr - return the original value of the return address + * @task: The task the unwinder is being executed on + * @idx: An initialized pointer to the next stack index to use + * @ret: The current return address (likely pointing to return_handler) + * @retp: The address on the stack of the current return location * * This function can be called by stack unwinding code to convert a found stack - * return address ('ret') to its original value, in case the function graph + * return address (@ret) to its original value, in case the function graph * tracer has modified it to be 'return_to_handler'. If the address hasn't - * been modified, the unchanged value of 'ret' is returned. + * been modified, the unchanged value of @ret is returned. * - * 'idx' is a state variable which should be initialized by the caller to zero - * before the first call. + * @idx holds the last index used to know where to start from. It should be + * initialized to zero for the first iteration as that will mean to start + * at the top of the shadow stack. If the location is found, this pointer + * will be assigned that location so that if called again, it will continue + * where it left off. * - * 'retp' is a pointer to the return address on the stack. It's ignored if - * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. + * @retp is a pointer to the return address on the stack. */ -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { - int index = task->curr_ret_stack; - int i; + struct ftrace_ret_stack *ret_stack; + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + int i = task->curr_ret_stack; - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) + if (ret != return_handler) return ret; - if (index < 0) + if (!idx) return ret; - for (i = 0; i <= index; i++) - if (task->ret_stack[i].retp == retp) - return task->ret_stack[i].ret; + i = *idx ? : task->curr_ret_stack; + while (i > 0) { + ret_stack = get_ret_stack(current, i, &i); + if (!ret_stack) + break; + /* + * For the tail-call, there would be 2 or more ftrace_ret_stacks on + * the ret_stack, which records "return_to_handler" as the return + * address except for the last one. + * But on the real stack, there should be 1 entry because tail-call + * reuses the return address on the stack and jump to the next function. + * Thus we will continue to find real return address. + */ + if (ret_stack->retp == retp && + ret_stack->ret != return_handler) { + *idx = i; + return ret_stack->ret; + } + } return ret; } -#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ -unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, - unsigned long ret, unsigned long *retp) -{ - int task_idx; - - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) - return ret; - - task_idx = task->curr_ret_stack; - - if (!task->ret_stack || task_idx < *idx) - return ret; - - task_idx -= *idx; - (*idx)++; - - return task->ret_stack[task_idx].ret; -} -#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { .func = ftrace_graph_func, - .flags = FTRACE_OPS_FL_INITIALIZED | - FTRACE_OPS_FL_PID | - FTRACE_OPS_GRAPH_STUB, + .flags = FTRACE_OPS_GRAPH_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ #endif - ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops) +{ + dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB; + +#ifdef CONFIG_DYNAMIC_FTRACE + if (src_ops) { + dst_ops->func_hash = &src_ops->local_hash; + mutex_init(&dst_ops->local_hash.regex_lock); + INIT_LIST_HEAD(&dst_ops->subop_list); + dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED; + } +#endif +} + void ftrace_graph_sleep_time_control(bool enable) { fgraph_sleep_time = enable; } -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) -{ - return 0; -} - /* * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h */ -extern void ftrace_stub_graph(struct ftrace_graph_ret *); +void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; -static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +static int alloc_retstack_tasklist(unsigned long **ret_stack_list) { int i; int ret = 0; @@ -399,10 +971,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) struct task_struct *g, *t; for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -420,9 +989,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) if (t->ret_stack == NULL) { atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack = -1; + ret_stack_init_task_vars(ret_stack_list[start]); + t->curr_ret_stack = 0; t->curr_ret_depth = -1; - /* Make sure the tasks see the -1 first: */ + /* Make sure the tasks see the 0 first: */ smp_wmb(); t->ret_stack = ret_stack_list[start++]; } @@ -442,8 +1012,9 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { + struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int index; + int offset; /* * Does the user want to count the time a function was asleep. @@ -466,57 +1037,23 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, */ timestamp -= next->ftrace_timestamp; - for (index = next->curr_ret_stack; index >= 0; index--) - next->ret_stack[index].calltime += timestamp; + for (offset = next->curr_ret_stack; offset > 0; ) { + ret_stack = get_ret_stack(next, offset, &offset); + if (ret_stack) + ret_stack->calltime += timestamp; + } } -static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) -{ - if (!ftrace_ops_test(&global_ops, trace->func, NULL)) - return 0; - return __ftrace_graph_entry(trace); -} - -/* - * The function graph tracer should only trace the functions defined - * by set_ftrace_filter and set_ftrace_notrace. If another function - * tracer ops is registered, the graph tracer requires testing the - * function against the global ops, and not just trace any function - * that any ftrace_ops registered. - */ -void update_function_graph_func(void) -{ - struct ftrace_ops *op; - bool do_test = false; - - /* - * The graph and global ops share the same set of functions - * to test. If any other ops is on the list, then - * the graph tracing needs to test if its the function - * it should call. - */ - do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op != &global_ops && op != &graph_ops && - op != &ftrace_list_end) { - do_test = true; - /* in double loop, break out with goto */ - goto out; - } - } while_for_each_ftrace_op(op); - out: - if (do_test) - ftrace_graph_entry = ftrace_graph_entry_test; - else - ftrace_graph_entry = __ftrace_graph_entry; -} - -static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); +static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); static void -graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); + ret_stack_init_task_vars(ret_stack); t->ftrace_timestamp = 0; + t->curr_ret_stack = 0; + t->curr_ret_depth = -1; /* make curr_ret_stack visible before we add the ret_stack */ smp_wmb(); t->ret_stack = ret_stack; @@ -528,7 +1065,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) */ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; /* * The idle task has no parent, it either has its own @@ -538,14 +1075,11 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; ret_stack = per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) = ret_stack; @@ -559,15 +1093,13 @@ void ftrace_graph_init_task(struct task_struct *t) { /* Make sure we do not use the parent ret_stack */ t->ret_stack = NULL; - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; - ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -576,7 +1108,7 @@ void ftrace_graph_init_task(struct task_struct *t) void ftrace_graph_exit_task(struct task_struct *t) { - struct ftrace_ret_stack *ret_stack = t->ret_stack; + unsigned long *ret_stack = t->ret_stack; t->ret_stack = NULL; /* NULL must become visible to IRQs before we free it: */ @@ -585,15 +1117,52 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } +#ifdef CONFIG_DYNAMIC_FTRACE +static int fgraph_pid_func(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + struct trace_array *tr = gops->ops.private; + int pid; + + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return 0; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return 0; + } + + return gops->saved_func(trace, gops); +} + +void fgraph_update_pid_func(void) +{ + struct fgraph_ops *gops; + struct ftrace_ops *op; + + if (!(graph_ops.flags & FTRACE_OPS_FL_INITIALIZED)) + return; + + list_for_each_entry(op, &graph_ops.subop_list, list) { + if (op->flags & FTRACE_OPS_FL_PID) { + gops = container_of(op, struct fgraph_ops, ops); + gops->entryfunc = ftrace_pids_enabled(op) ? + fgraph_pid_func : gops->saved_func; + if (ftrace_graph_active == 1) + static_call_update(fgraph_func, gops->entryfunc); + } + } +} +#endif + /* Allocate a return stack for each task */ static int start_graph_tracing(void) { - struct ftrace_ret_stack **ret_stack_list; + unsigned long **ret_stack_list; int ret, cpu; - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; @@ -619,40 +1188,111 @@ static int start_graph_tracing(void) return ret; } +static void init_task_vars(int idx) +{ + struct task_struct *g, *t; + int cpu; + + for_each_online_cpu(cpu) { + if (idle_task(cpu)->ret_stack) + ret_stack_set_task_var(idle_task(cpu), idx, 0); + } + + read_lock(&tasklist_lock); + for_each_process_thread(g, t) { + if (t->ret_stack) + ret_stack_set_task_var(t, idx, 0); + } + read_unlock(&tasklist_lock); +} + +static void ftrace_graph_enable_direct(bool enable_branch) +{ + trace_func_graph_ent_t func = NULL; + trace_func_graph_ret_t retfunc = NULL; + int i; + + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + func = fgraph_array[i]->entryfunc; + retfunc = fgraph_array[i]->retfunc; + fgraph_direct_gops = fgraph_array[i]; + } + if (WARN_ON_ONCE(!func)) + return; + + static_call_update(fgraph_func, func); + static_call_update(fgraph_retfunc, retfunc); + if (enable_branch) + static_branch_disable(&fgraph_do_direct); +} + +static void ftrace_graph_disable_direct(bool disable_branch) +{ + if (disable_branch) + static_branch_disable(&fgraph_do_direct); + static_call_update(fgraph_func, ftrace_graph_entry_stub); + static_call_update(fgraph_retfunc, ftrace_graph_ret_stub); + fgraph_direct_gops = &fgraph_stub; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; int ret = 0; + int i = -1; mutex_lock(&ftrace_lock); - /* we currently allow only one tracer registered at a time */ - if (ftrace_graph_active) { - ret = -EBUSY; + if (!fgraph_array[0]) { + /* The array must always have real data on it */ + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) + fgraph_array[i] = &fgraph_stub; + fgraph_lru_init(); + } + + i = fgraph_lru_alloc_index(); + if (i < 0 || WARN_ON_ONCE(fgraph_array[i] != &fgraph_stub)) { + ret = -ENOSPC; goto out; } - register_pm_notifier(&ftrace_suspend_notifier); + fgraph_array[i] = gops; + gops->idx = i; ftrace_graph_active++; - ret = start_graph_tracing(); - if (ret) { - ftrace_graph_active--; - goto out; + + if (ftrace_graph_active == 2) + ftrace_graph_disable_direct(true); + + if (ftrace_graph_active == 1) { + ftrace_graph_enable_direct(false); + register_pm_notifier(&ftrace_suspend_notifier); + ret = start_graph_tracing(); + if (ret) + goto error; + /* + * Some archs just test to see if these are not + * the default function + */ + ftrace_graph_return = return_run; + ftrace_graph_entry = entry_run; + command = FTRACE_START_FUNC_RET; + } else { + init_task_vars(gops->idx); } - ftrace_graph_return = gops->retfunc; + /* Always save the function, and reset at unregistering */ + gops->saved_func = gops->entryfunc; - /* - * Update the indirect function to the entryfunc, and the - * function that gets called to the entry_test first. Then - * call the update fgraph entry function to determine if - * the entryfunc should be called directly or not. - */ - __ftrace_graph_entry = gops->entryfunc; - ftrace_graph_entry = ftrace_graph_entry_test; - update_function_graph_func(); - - ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); + ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); +error: + if (ret) { + fgraph_array[i] = &fgraph_stub; + ftrace_graph_active--; + gops->saved_func = NULL; + fgraph_lru_release_index(i); + } out: mutex_unlock(&ftrace_lock); return ret; @@ -660,19 +1300,41 @@ out: void unregister_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; + mutex_lock(&ftrace_lock); if (unlikely(!ftrace_graph_active)) goto out; - ftrace_graph_active--; - ftrace_graph_return = ftrace_stub_graph; - ftrace_graph_entry = ftrace_graph_entry_stub; - __ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); - unregister_pm_notifier(&ftrace_suspend_notifier); - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); + if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE || + fgraph_array[gops->idx] != gops)) + goto out; + if (fgraph_lru_release_index(gops->idx) < 0) + goto out; + + fgraph_array[gops->idx] = &fgraph_stub; + + ftrace_graph_active--; + + if (!ftrace_graph_active) + command = FTRACE_STOP_FUNC_RET; + + ftrace_shutdown_subops(&graph_ops, &gops->ops, command); + + if (ftrace_graph_active == 1) + ftrace_graph_enable_direct(true); + else if (!ftrace_graph_active) + ftrace_graph_disable_direct(false); + + if (!ftrace_graph_active) { + ftrace_graph_return = ftrace_stub_graph; + ftrace_graph_entry = ftrace_graph_entry_stub; + unregister_pm_notifier(&ftrace_suspend_notifier); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); + } out: + gops->saved_func = NULL; mutex_unlock(&ftrace_lock); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eacab4020508..e5d6a4ab433b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -74,7 +74,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), + .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \ + .subop_list = LIST_HEAD_INIT(opsname.subop_list), #else #define INIT_OPS_HASH(opsname) #endif @@ -99,7 +100,7 @@ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* What to set function_trace_op to */ static struct ftrace_ops *set_function_trace_op; -static bool ftrace_pids_enabled(struct ftrace_ops *ops) +bool ftrace_pids_enabled(struct ftrace_ops *ops) { struct trace_array *tr; @@ -121,7 +122,7 @@ static int ftrace_disabled __read_mostly; DEFINE_MUTEX(ftrace_lock); -struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; +struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = (struct ftrace_ops __rcu *)&ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; @@ -161,12 +162,14 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { mutex_init(&ops->local_hash.regex_lock); + INIT_LIST_HEAD(&ops->subop_list); ops->func_hash = &ops->local_hash; ops->flags |= FTRACE_OPS_FL_INITIALIZED; } #endif } +/* Call this function for when a callback filters on set_ftrace_pid */ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { @@ -235,8 +238,6 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } - update_function_graph_func(); - /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; @@ -310,7 +311,7 @@ static int remove_ftrace_ops(struct ftrace_ops __rcu **list, lockdep_is_held(&ftrace_lock)) == ops && rcu_dereference_protected(ops->next, lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { - *list = &ftrace_list_end; + rcu_assign_pointer(*list, &ftrace_list_end); return 0; } @@ -406,6 +407,8 @@ static void ftrace_update_pid_func(void) } } while_for_each_ftrace_op(op); + fgraph_update_pid_func(); + update_ftrace_function(); } @@ -817,7 +820,8 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } -static int profile_graph_entry(struct ftrace_graph_ent *trace) +static int profile_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct ftrace_ret_stack *ret_stack; @@ -834,7 +838,8 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) return 1; } -static void profile_graph_return(struct ftrace_graph_ret *trace) +static void profile_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct ftrace_ret_stack *ret_stack; struct ftrace_profile_stat *stat; @@ -1314,7 +1319,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) return hash; } - +/* Used to save filters on functions for modules not loaded yet */ static int ftrace_add_mod(struct trace_array *tr, const char *func, const char *module, int enable) @@ -1380,15 +1385,17 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) return NULL; } -static void -ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash); -static void -ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); +static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops); +static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops); static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, struct ftrace_hash *new_hash); -static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size) +/* + * Allocate a new hash and remove entries from @src and move them to the new hash. + * On success, the @src hash will be empty and should be freed. + */ +static struct ftrace_hash *__move_hash(struct ftrace_hash *src, int size) { struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; @@ -1424,6 +1431,7 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size) return new_hash; } +/* Move the @src entries to a newly allocated hash */ static struct ftrace_hash * __ftrace_hash_move(struct ftrace_hash *src) { @@ -1435,9 +1443,29 @@ __ftrace_hash_move(struct ftrace_hash *src) if (ftrace_hash_empty(src)) return EMPTY_HASH; - return dup_hash(src, size); + return __move_hash(src, size); } +/** + * ftrace_hash_move - move a new hash to a filter and do updates + * @ops: The ops with the hash that @dst points to + * @enable: True if for the filter hash, false for the notrace hash + * @dst: Points to the @ops hash that should be updated + * @src: The hash to update @dst with + * + * This is called when an ftrace_ops hash is being updated and the + * the kernel needs to reflect this. Note, this only updates the kernel + * function callbacks if the @ops is enabled (not to be confused with + * @enable above). If the @ops is enabled, its hash determines what + * callbacks get called. This function gets called when the @ops hash + * is updated and it requires new callbacks. + * + * On success the elements of @src is moved to @dst, and @dst is updated + * properly, as well as the functions determined by the @ops hashes + * are now calling the @ops callback function. + * + * Regardless of return type, @src should be freed with free_ftrace_hash(). + */ static int ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src) @@ -1467,11 +1495,11 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, * Remove the current set, update the hash and add * them back. */ - ftrace_hash_rec_disable_modify(ops, enable); + ftrace_hash_rec_disable_modify(ops); rcu_assign_pointer(*dst, new_hash); - ftrace_hash_rec_enable_modify(ops, enable); + ftrace_hash_rec_enable_modify(ops); return 0; } @@ -1694,12 +1722,21 @@ static bool skip_record(struct dyn_ftrace *rec) !(rec->flags & FTRACE_FL_ENABLED); } +/* + * This is the main engine to the ftrace updates to the dyn_ftrace records. + * + * It will iterate through all the available ftrace functions + * (the ones that ftrace can have callbacks to) and set the flags + * in the associated dyn_ftrace records. + * + * @inc: If true, the functions associated to @ops are added to + * the dyn_ftrace records, otherwise they are removed. + */ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, - int filter_hash, bool inc) { struct ftrace_hash *hash; - struct ftrace_hash *other_hash; + struct ftrace_hash *notrace_hash; struct ftrace_page *pg; struct dyn_ftrace *rec; bool update = false; @@ -1711,35 +1748,16 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, return false; /* - * In the filter_hash case: * If the count is zero, we update all records. * Otherwise we just update the items in the hash. - * - * In the notrace_hash case: - * We enable the update in the hash. - * As disabling notrace means enabling the tracing, - * and enabling notrace means disabling, the inc variable - * gets inversed. */ - if (filter_hash) { - hash = ops->func_hash->filter_hash; - other_hash = ops->func_hash->notrace_hash; - if (ftrace_hash_empty(hash)) - all = true; - } else { - inc = !inc; - hash = ops->func_hash->notrace_hash; - other_hash = ops->func_hash->filter_hash; - /* - * If the notrace hash has no items, - * then there's nothing to do. - */ - if (ftrace_hash_empty(hash)) - return false; - } + hash = ops->func_hash->filter_hash; + notrace_hash = ops->func_hash->notrace_hash; + if (ftrace_hash_empty(hash)) + all = true; do_for_each_ftrace_rec(pg, rec) { - int in_other_hash = 0; + int in_notrace_hash = 0; int in_hash = 0; int match = 0; @@ -1751,26 +1769,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * Only the filter_hash affects all records. * Update if the record is not in the notrace hash. */ - if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) + if (!notrace_hash || !ftrace_lookup_ip(notrace_hash, rec->ip)) match = 1; } else { in_hash = !!ftrace_lookup_ip(hash, rec->ip); - in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); + in_notrace_hash = !!ftrace_lookup_ip(notrace_hash, rec->ip); /* - * If filter_hash is set, we want to match all functions - * that are in the hash but not in the other hash. - * - * If filter_hash is not set, then we are decrementing. - * That means we match anything that is in the hash - * and also in the other_hash. That is, we need to turn - * off functions in the other hash because they are disabled - * by this hash. + * We want to match all functions that are in the hash but + * not in the other hash. */ - if (filter_hash && in_hash && !in_other_hash) - match = 1; - else if (!filter_hash && in_hash && - (in_other_hash || ftrace_hash_empty(other_hash))) + if (in_hash && !in_notrace_hash) match = 1; } if (!match) @@ -1876,24 +1885,48 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, return update; } -static bool ftrace_hash_rec_disable(struct ftrace_ops *ops, - int filter_hash) +/* + * This is called when an ops is removed from tracing. It will decrement + * the counters of the dyn_ftrace records for all the functions that + * the @ops attached to. + */ +static bool ftrace_hash_rec_disable(struct ftrace_ops *ops) { - return __ftrace_hash_rec_update(ops, filter_hash, 0); + return __ftrace_hash_rec_update(ops, false); } -static bool ftrace_hash_rec_enable(struct ftrace_ops *ops, - int filter_hash) +/* + * This is called when an ops is added to tracing. It will increment + * the counters of the dyn_ftrace records for all the functions that + * the @ops attached to. + */ +static bool ftrace_hash_rec_enable(struct ftrace_ops *ops) { - return __ftrace_hash_rec_update(ops, filter_hash, 1); + return __ftrace_hash_rec_update(ops, true); } -static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, - int filter_hash, int inc) +/* + * This function will update what functions @ops traces when its filter + * changes. + * + * The @inc states if the @ops callbacks are going to be added or removed. + * When one of the @ops hashes are updated to a "new_hash" the dyn_ftrace + * records are update via: + * + * ftrace_hash_rec_disable_modify(ops); + * ops->hash = new_hash + * ftrace_hash_rec_enable_modify(ops); + * + * Where the @ops is removed from all the records it is tracing using + * its old hash. The @ops hash is updated to the new hash, and then + * the @ops is added back to the records so that it is tracing all + * the new functions. + */ +static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, bool inc) { struct ftrace_ops *op; - __ftrace_hash_rec_update(ops, filter_hash, inc); + __ftrace_hash_rec_update(ops, inc); if (ops->func_hash != &global_ops.local_hash) return; @@ -1907,20 +1940,18 @@ static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, if (op == ops) continue; if (op->func_hash == &global_ops.local_hash) - __ftrace_hash_rec_update(op, filter_hash, inc); + __ftrace_hash_rec_update(op, inc); } while_for_each_ftrace_op(op); } -static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, - int filter_hash) +static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops) { - ftrace_hash_rec_update_modify(ops, filter_hash, 0); + ftrace_hash_rec_update_modify(ops, false); } -static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, - int filter_hash) +static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops) { - ftrace_hash_rec_update_modify(ops, filter_hash, 1); + ftrace_hash_rec_update_modify(ops, true); } /* @@ -3043,7 +3074,7 @@ int ftrace_startup(struct ftrace_ops *ops, int command) return ret; } - if (ftrace_hash_rec_enable(ops, 1)) + if (ftrace_hash_rec_enable(ops)) command |= FTRACE_UPDATE_CALLS; ftrace_startup_enable(command); @@ -3085,7 +3116,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) /* Disabling ipmodify never fails */ ftrace_hash_ipmodify_disable(ops); - if (ftrace_hash_rec_disable(ops, 1)) + if (ftrace_hash_rec_disable(ops)) command |= FTRACE_UPDATE_CALLS; ops->flags &= ~FTRACE_OPS_FL_ENABLED; @@ -3164,6 +3195,474 @@ out: return 0; } +/* Simply make a copy of @src and return it */ +static struct ftrace_hash *copy_hash(struct ftrace_hash *src) +{ + if (ftrace_hash_empty(src)) + return EMPTY_HASH; + + return alloc_and_copy_ftrace_hash(src->size_bits, src); +} + +/* + * Append @new_hash entries to @hash: + * + * If @hash is the EMPTY_HASH then it traces all functions and nothing + * needs to be done. + * + * If @new_hash is the EMPTY_HASH, then make *hash the EMPTY_HASH so + * that it traces everything. + * + * Otherwise, go through all of @new_hash and add anything that @hash + * doesn't already have, to @hash. + * + * The filter_hash updates uses just the append_hash() function + * and the notrace_hash does not. + */ +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + + /* If new_hash has everything make hash have everything */ + if (ftrace_hash_empty(new_hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; + } + + size = 1 << new_hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash->buckets[i], hlist) { + /* Only add if not already in hash */ + if (!__ftrace_lookup_ip(*hash, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + return 0; +} + +/* + * Add to @hash only those that are in both @new_hash1 and @new_hash2 + * + * The notrace_hash updates uses just the intersect_hash() function + * and the filter_hash does not. + */ +static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash1, + struct ftrace_hash *new_hash2) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + /* + * If new_hash1 or new_hash2 is the EMPTY_HASH then make the hash + * empty as well as empty for notrace means none are notraced. + */ + if (ftrace_hash_empty(new_hash1) || ftrace_hash_empty(new_hash2)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; + } + + size = 1 << new_hash1->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash1->buckets[i], hlist) { + /* Only add if in both @new_hash1 and @new_hash2 */ + if (__ftrace_lookup_ip(new_hash2, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + /* If nothing intersects, make it the empty set */ + if (ftrace_hash_empty(*hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + } + return 0; +} + +/* Return a new hash that has a union of all @ops->filter_hash entries */ +static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) +{ + struct ftrace_hash *new_hash; + struct ftrace_ops *subops; + int ret; + + new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits); + if (!new_hash) + return NULL; + + list_for_each_entry(subops, &ops->subop_list, list) { + ret = append_hash(&new_hash, subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(new_hash); + return NULL; + } + /* Nothing more to do if new_hash is empty */ + if (ftrace_hash_empty(new_hash)) + break; + } + return new_hash; +} + +/* Make @ops trace evenything except what all its subops do not trace */ +static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) +{ + struct ftrace_hash *new_hash = NULL; + struct ftrace_ops *subops; + int size_bits; + int ret; + + list_for_each_entry(subops, &ops->subop_list, list) { + struct ftrace_hash *next_hash; + + if (!new_hash) { + size_bits = subops->func_hash->notrace_hash->size_bits; + new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); + if (!new_hash) + return NULL; + continue; + } + size_bits = new_hash->size_bits; + next_hash = new_hash; + new_hash = alloc_ftrace_hash(size_bits); + ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); + free_ftrace_hash(next_hash); + if (ret < 0) { + free_ftrace_hash(new_hash); + return NULL; + } + /* Nothing more to do if new_hash is empty */ + if (ftrace_hash_empty(new_hash)) + break; + } + return new_hash; +} + +static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + if (ftrace_hash_empty(A)) + return ftrace_hash_empty(B); + + if (ftrace_hash_empty(B)) + return ftrace_hash_empty(A); + + if (A->count != B->count) + return false; + + size = 1 << A->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &A->buckets[i], hlist) { + if (!__ftrace_lookup_ip(B, entry->ip)) + return false; + } + } + + return true; +} + +static void ftrace_ops_update_code(struct ftrace_ops *ops, + struct ftrace_ops_hash *old_hash); + +static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops_hash old_hash_ops; + struct ftrace_hash *old_hash; + int ret; + + old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret) { + ftrace_ops_update_code(ops, &old_hash_ops); + free_ftrace_hash_rcu(old_hash); + } + return ret; +} + +static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_hash, + struct ftrace_hash *notrace_hash) +{ + int ret; + + if (!ops_equal(filter_hash, ops->func_hash->filter_hash)) { + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash, + filter_hash, 1); + if (ret < 0) + return ret; + } + + if (!ops_equal(notrace_hash, ops->func_hash->notrace_hash)) { + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash, + notrace_hash, 0); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * ftrace_startup_subops - enable tracing for subops of an ops + * @ops: Manager ops (used to pick all the functions of its subops) + * @subops: A new ops to add to @ops + * @command: Extra commands to use to enable tracing + * + * The @ops is a manager @ops that has the filter that includes all the functions + * that its list of subops are tracing. Adding a new @subops will add the + * functions of @subops to @ops. + */ +int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + struct ftrace_hash *filter_hash; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *save_filter_hash; + struct ftrace_hash *save_notrace_hash; + int size_bits; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + ftrace_ops_init(ops); + ftrace_ops_init(subops); + + if (WARN_ON_ONCE(subops->flags & FTRACE_OPS_FL_ENABLED)) + return -EBUSY; + + /* Make everything canonical (Just in case!) */ + if (!ops->func_hash->filter_hash) + ops->func_hash->filter_hash = EMPTY_HASH; + if (!ops->func_hash->notrace_hash) + ops->func_hash->notrace_hash = EMPTY_HASH; + if (!subops->func_hash->filter_hash) + subops->func_hash->filter_hash = EMPTY_HASH; + if (!subops->func_hash->notrace_hash) + subops->func_hash->notrace_hash = EMPTY_HASH; + + /* For the first subops to ops just enable it normally */ + if (list_empty(&ops->subop_list)) { + /* Just use the subops hashes */ + filter_hash = copy_hash(subops->func_hash->filter_hash); + notrace_hash = copy_hash(subops->func_hash->notrace_hash); + if (!filter_hash || !notrace_hash) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return -ENOMEM; + } + + save_filter_hash = ops->func_hash->filter_hash; + save_notrace_hash = ops->func_hash->notrace_hash; + + ops->func_hash->filter_hash = filter_hash; + ops->func_hash->notrace_hash = notrace_hash; + list_add(&subops->list, &ops->subop_list); + ret = ftrace_startup(ops, command); + if (ret < 0) { + list_del(&subops->list); + ops->func_hash->filter_hash = save_filter_hash; + ops->func_hash->notrace_hash = save_notrace_hash; + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + } else { + free_ftrace_hash(save_filter_hash); + free_ftrace_hash(save_notrace_hash); + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; + } + return ret; + } + + /* + * Here there's already something attached. Here are the rules: + * o If either filter_hash is empty then the final stays empty + * o Otherwise, the final is a superset of both hashes + * o If either notrace_hash is empty then the final stays empty + * o Otherwise, the final is an intersection between the hashes + */ + if (ftrace_hash_empty(ops->func_hash->filter_hash) || + ftrace_hash_empty(subops->func_hash->filter_hash)) { + filter_hash = EMPTY_HASH; + } else { + size_bits = max(ops->func_hash->filter_hash->size_bits, + subops->func_hash->filter_hash->size_bits); + filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); + if (!filter_hash) + return -ENOMEM; + ret = append_hash(&filter_hash, subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(filter_hash); + return ret; + } + } + + if (ftrace_hash_empty(ops->func_hash->notrace_hash) || + ftrace_hash_empty(subops->func_hash->notrace_hash)) { + notrace_hash = EMPTY_HASH; + } else { + size_bits = max(ops->func_hash->filter_hash->size_bits, + subops->func_hash->filter_hash->size_bits); + notrace_hash = alloc_ftrace_hash(size_bits); + if (!notrace_hash) { + free_ftrace_hash(filter_hash); + return -ENOMEM; + } + + ret = intersect_hash(¬race_hash, ops->func_hash->filter_hash, + subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return ret; + } + } + + list_add(&subops->list, &ops->subop_list); + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + if (ret < 0) { + list_del(&subops->list); + } else { + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; + } + return ret; +} + +/** + * ftrace_shutdown_subops - Remove a subops from a manager ops + * @ops: A manager ops to remove @subops from + * @subops: The subops to remove from @ops + * @command: Any extra command flags to add to modifying the text + * + * Removes the functions being traced by the @subops from @ops. Note, it + * will not affect functions that are being traced by other subops that + * still exist in @ops. + * + * If the last subops is removed from @ops, then @ops is shutdown normally. + */ +int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + struct ftrace_hash *filter_hash; + struct ftrace_hash *notrace_hash; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + if (WARN_ON_ONCE(!(subops->flags & FTRACE_OPS_FL_ENABLED))) + return -EINVAL; + + list_del(&subops->list); + + if (list_empty(&ops->subop_list)) { + /* Last one, just disable the current ops */ + + ret = ftrace_shutdown(ops, command); + if (ret < 0) { + list_add(&subops->list, &ops->subop_list); + return ret; + } + + subops->flags &= ~FTRACE_OPS_FL_ENABLED; + + free_ftrace_hash(ops->func_hash->filter_hash); + free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; + + return 0; + } + + /* Rebuild the hashes without subops */ + filter_hash = append_hashes(ops); + notrace_hash = intersect_hashes(ops); + if (!filter_hash || !notrace_hash) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + list_add(&subops->list, &ops->subop_list); + return -ENOMEM; + } + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + if (ret < 0) { + list_add(&subops->list, &ops->subop_list); + } else { + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; + } + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return ret; +} + +static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, + struct ftrace_hash **orig_subhash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops *ops = subops->managed; + struct ftrace_hash **orig_hash; + struct ftrace_hash *save_hash; + struct ftrace_hash *new_hash; + int ret; + + /* Manager ops can not be subops (yet) */ + if (WARN_ON_ONCE(!ops || ops->flags & FTRACE_OPS_FL_SUBOP)) + return -EINVAL; + + /* Move the new hash over to the subops hash */ + save_hash = *orig_subhash; + *orig_subhash = __ftrace_hash_move(hash); + if (!*orig_subhash) { + *orig_subhash = save_hash; + return -ENOMEM; + } + + /* Create a new_hash to hold the ops new functions */ + if (enable) { + orig_hash = &ops->func_hash->filter_hash; + new_hash = append_hashes(ops); + } else { + orig_hash = &ops->func_hash->notrace_hash; + new_hash = intersect_hashes(ops); + } + + /* Move the hash over to the new hash */ + ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); + + free_ftrace_hash(new_hash); + + if (ret) { + /* Put back the original hash */ + free_ftrace_hash_rcu(*orig_subhash); + *orig_subhash = save_hash; + } else { + free_ftrace_hash_rcu(save_hash); + } + return ret; +} + + static u64 ftrace_update_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; @@ -4380,19 +4879,33 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, struct ftrace_hash *hash, int enable) { - struct ftrace_ops_hash old_hash_ops; - struct ftrace_hash *old_hash; - int ret; + if (ops->flags & FTRACE_OPS_FL_SUBOP) + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); - old_hash = *orig_hash; - old_hash_ops.filter_hash = ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; - ret = ftrace_hash_move(ops, enable, orig_hash, hash); - if (!ret) { - ftrace_ops_update_code(ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); + /* + * If this ops is not enabled, it could be sharing its filters + * with a subop. If that's the case, update the subop instead of + * this ops. Shared filters are only allowed to have one ops set + * at a time, and if we update the ops that is not enabled, + * it will not affect subops that share it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) { + struct ftrace_ops *op; + + /* Check if any other manager subops maps to this hash */ + do_for_each_ftrace_op(op, ftrace_ops_list) { + struct ftrace_ops *subops; + + list_for_each_entry(subops, &op->subop_list, list) { + if ((subops->flags & FTRACE_OPS_FL_ENABLED) && + subops->func_hash == ops->func_hash) { + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); + } + } + } while_for_each_ftrace_op(op); } - return ret; + + return __ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); } static bool module_exists(const char *module) @@ -5475,6 +5988,8 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct); * unregister_ftrace_direct - Remove calls to custom trampoline * previously registered by register_ftrace_direct for @ops object. * @ops: The address of the struct ftrace_ops object + * @addr: The address of the direct function that is called by the @ops functions + * @free_filters: Set to true to remove all filters for the ftrace_ops, false otherwise * * This is used to remove a direct calls to @addr from the nop locations * of the functions registered in @ops (with by ftrace_set_filter_ip @@ -7324,6 +7839,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr) tr->ops = &global_ops; tr->ops->private = tr; ftrace_init_trace_array(tr); + init_array_fgraph_ops(tr, tr->ops); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 5012c04f92c0..3235470e61b3 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -15,6 +15,8 @@ extern struct ftrace_ops global_ops; int ftrace_startup(struct ftrace_ops *ops, int command); int ftrace_shutdown(struct ftrace_ops *ops, int command); int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs); +int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command); +int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command); #else /* !CONFIG_DYNAMIC_FTRACE */ @@ -38,14 +40,26 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { return 1; } +static inline int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + return -EINVAL; +} +static inline int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + return -EINVAL; +} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern int ftrace_graph_active; -void update_function_graph_func(void); +# ifdef CONFIG_DYNAMIC_FTRACE +extern void fgraph_update_pid_func(void); +# else +static inline void fgraph_update_pid_func(void) {} +# endif #else /* !CONFIG_FUNCTION_GRAPH_TRACER */ # define ftrace_graph_active 0 -static inline void update_function_graph_func(void) { } +static inline void fgraph_update_pid_func(void) {} #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #else /* !CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 749a182dab48..8783bebd0562 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -397,6 +397,9 @@ struct trace_array { struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + struct fgraph_ops *gops; +#endif #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; @@ -679,9 +682,8 @@ void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); -void trace_graph_return(struct ftrace_graph_ret *trace); -int trace_graph_entry(struct ftrace_graph_ent *trace); -void set_graph_array(struct trace_array *tr); +void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -892,12 +894,59 @@ extern int __trace_graph_entry(struct trace_array *tr, extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); +extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); +extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); +extern void free_fgraph_ops(struct trace_array *tr); + +enum { + TRACE_GRAPH_FL = 1, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_FL + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, +}; + +#define TRACE_GRAPH_NOTRACE (1 << TRACE_GRAPH_NOTRACE_BIT) + +static inline unsigned long ftrace_graph_depth(unsigned long *task_var) +{ + return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3; +} + +static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth) +{ + *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT); + *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT; +} #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int +ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; @@ -919,13 +968,12 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) } if (ftrace_lookup_ip(hash, addr)) { - /* * This needs to be cleared on the return functions * when the depth is zero. */ - trace_recursion_set(TRACE_GRAPH_BIT); - trace_recursion_set_depth(trace->depth); + *task_var |= TRACE_GRAPH_FL; + ftrace_graph_set_depth(task_var, trace->depth); /* * If no irqs are to be traced, but a set_graph_function @@ -944,11 +992,14 @@ out: return ret; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void +ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { - if (trace_recursion_test(TRACE_GRAPH_BIT) && - trace->depth == trace_recursion_depth()) - trace_recursion_clear(TRACE_GRAPH_BIT); + unsigned long *task_var = fgraph_get_task_var(gops); + + if ((*task_var & TRACE_GRAPH_FL) && + trace->depth == ftrace_graph_depth(task_var)) + *task_var &= ~TRACE_GRAPH_FL; } static inline int ftrace_graph_notrace_addr(unsigned long addr) @@ -974,7 +1025,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) return ret; } #else -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { return 1; } @@ -983,27 +1034,37 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; -static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) +static inline bool +ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) { + unsigned long *task_var = fgraph_get_task_var(gops); + /* trace it when it is-nested-in or is a function enabled. */ - return !(trace_recursion_test(TRACE_GRAPH_BIT) || - ftrace_graph_addr(trace)) || + return !((*task_var & TRACE_GRAPH_FL) || + ftrace_graph_addr(task_var, trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops); + #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } +static inline void free_fgraph_ops(struct trace_array *tr) { } +/* ftrace_ops may not be defined */ +#define init_array_fgraph_ops(tr, ops) do { } while (0) +#define allocate_fgraph_ops(tr, ops) ({ 0; }) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9f1bfbe105e8..3b0cea37e029 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -80,6 +80,7 @@ void ftrace_free_ftrace_ops(struct trace_array *tr) int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { + int ret; /* * The top level array uses the "global_ops", and the files are * created on boot up. @@ -90,6 +91,12 @@ int ftrace_create_function_files(struct trace_array *tr, if (!tr->ops) return -EINVAL; + ret = allocate_fgraph_ops(tr, tr->ops); + if (ret) { + kfree(tr->ops); + return ret; + } + ftrace_create_filter_files(tr->ops, parent); return 0; @@ -99,6 +106,7 @@ void ftrace_destroy_function_files(struct trace_array *tr) { ftrace_destroy_filter_files(tr->ops); ftrace_free_ftrace_ops(tr); + free_fgraph_ops(tr); } static ftrace_func_t select_trace_function(u32 flags_val) @@ -223,6 +231,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, long disabled; int cpu; unsigned int trace_ctx; + int skip = STACK_SKIP; if (unlikely(!tr->function_enabled)) return; @@ -239,7 +248,11 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); trace_function(tr, ip, parent_ip, trace_ctx); - __trace_stack(tr, trace_ctx, STACK_SKIP); +#ifdef CONFIG_UNWINDER_FRAME_POINTER + if (ftrace_pids_enabled(op)) + skip++; +#endif + __trace_stack(tr, trace_ctx, skip); } atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c35fbaab2a47..13d0387ac6a6 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -83,8 +83,6 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; -static struct trace_array *graph_array; - /* * DURATION column is being also used to display IRQ signs, * following values are used by print_graph_irq and others @@ -129,9 +127,11 @@ static inline int ftrace_graph_ignore_irqs(void) return in_hardirq(); } -int trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { - struct trace_array *tr = graph_array; + unsigned long *task_var = fgraph_get_task_var(gops); + struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; unsigned int trace_ctx; @@ -139,7 +139,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int ret; int cpu; - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) + if (*task_var & TRACE_GRAPH_NOTRACE) return 0; /* @@ -150,7 +150,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT); + *task_var |= TRACE_GRAPH_NOTRACE_BIT; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. @@ -161,7 +161,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (!ftrace_trace_task(tr)) return 0; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; if (ftrace_graph_ignore_irqs()) @@ -238,19 +238,21 @@ void __trace_graph_return(struct trace_array *tr, trace_buffer_unlock_commit_nostack(buffer, event); } -void trace_graph_return(struct ftrace_graph_ret *trace) +void trace_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { - struct trace_array *tr = graph_array; + unsigned long *task_var = fgraph_get_task_var(gops); + struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; unsigned int trace_ctx; long disabled; int cpu; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { - trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); + if (*task_var & TRACE_GRAPH_NOTRACE) { + *task_var &= ~TRACE_GRAPH_NOTRACE; return; } @@ -266,18 +268,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_restore(flags); } -void set_graph_array(struct trace_array *tr) +static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { - graph_array = tr; - - /* Make graph_array visible before we start tracing */ - - smp_mb(); -} - -static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) -{ - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); @@ -288,28 +282,60 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) (trace->rettime - trace->calltime < tracing_thresh)) return; else - trace_graph_return(trace); + trace_graph_return(trace, gops); } -static struct fgraph_ops funcgraph_thresh_ops = { - .entryfunc = &trace_graph_entry, - .retfunc = &trace_graph_thresh_return, -}; - static struct fgraph_ops funcgraph_ops = { .entryfunc = &trace_graph_entry, .retfunc = &trace_graph_return, }; +int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops) +{ + struct fgraph_ops *gops; + + gops = kzalloc(sizeof(*gops), GFP_KERNEL); + if (!gops) + return -ENOMEM; + + gops->entryfunc = &trace_graph_entry; + gops->retfunc = &trace_graph_return; + + tr->gops = gops; + gops->private = tr; + + fgraph_init_ops(&gops->ops, ops); + + return 0; +} + +void free_fgraph_ops(struct trace_array *tr) +{ + kfree(tr->gops); +} + +__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops) +{ + tr->gops = &funcgraph_ops; + funcgraph_ops.private = tr; + fgraph_init_ops(&tr->gops->ops, ops); +} + static int graph_trace_init(struct trace_array *tr) { int ret; - set_graph_array(tr); + tr->gops->entryfunc = trace_graph_entry; + if (tracing_thresh) - ret = register_ftrace_graph(&funcgraph_thresh_ops); + tr->gops->retfunc = trace_graph_thresh_return; else - ret = register_ftrace_graph(&funcgraph_ops); + tr->gops->retfunc = trace_graph_return; + + /* Make gops functions are visible before we start tracing */ + smp_mb(); + + ret = register_ftrace_graph(tr->gops); if (ret) return ret; tracing_start_cmdline_record(); @@ -320,10 +346,7 @@ static int graph_trace_init(struct trace_array *tr) static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); - if (tracing_thresh) - unregister_ftrace_graph(&funcgraph_thresh_ops); - else - unregister_ftrace_graph(&funcgraph_ops); + unregister_ftrace_graph(tr->gops); } static int graph_trace_update_thresh(struct trace_array *tr) @@ -1362,6 +1385,7 @@ static struct tracer graph_trace __tracer_data = { .print_header = print_graph_headers, .flags = &tracer_flags, .set_flag = func_graph_set_flag, + .allow_instances = true, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function_graph, #endif diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ba37f768e2f2..fce064e20570 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -175,7 +175,8 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) return start_irqsoff_tracer(irqsoff_trace, set); } -static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; @@ -183,7 +184,7 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) unsigned int trace_ctx; int ret; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; /* * Do not trace a function if it's filtered by set_graph_notrace. @@ -205,14 +206,15 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) return ret; } -static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +static void irqsoff_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; unsigned int trace_ctx; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (!func_prolog_dec(tr, &data, &flags)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0469a04a355f..130ca7e7787e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -112,14 +112,15 @@ static int wakeup_display_graph(struct trace_array *tr, int set) return start_func_tracer(tr, set); } -static int wakeup_graph_entry(struct ftrace_graph_ent *trace) +static int wakeup_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned int trace_ctx; int ret = 0; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; /* * Do not trace a function if it's filtered by set_graph_notrace. @@ -141,13 +142,14 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) return ret; } -static void wakeup_graph_return(struct ftrace_graph_ret *trace) +static void wakeup_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned int trace_ctx; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index e9c5058a8efd..97f1e4bc47dc 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -756,13 +756,262 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) #ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE + +#define CHAR_NUMBER 123 +#define SHORT_NUMBER 12345 +#define WORD_NUMBER 1234567890 +#define LONG_NUMBER 1234567890123456789LL +#define ERRSTR_BUFLEN 128 + +struct fgraph_fixture { + struct fgraph_ops gops; + int store_size; + const char *store_type_name; + char error_str_buf[ERRSTR_BUFLEN]; + char *error_str; +}; + +static __init int store_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); + const char *type = fixture->store_type_name; + int size = fixture->store_size; + void *p; + + p = fgraph_reserve_data(gops->idx, size); + if (!p) { + snprintf(fixture->error_str_buf, ERRSTR_BUFLEN, + "Failed to reserve %s\n", type); + return 0; + } + + switch (size) { + case 1: + *(char *)p = CHAR_NUMBER; + break; + case 2: + *(short *)p = SHORT_NUMBER; + break; + case 4: + *(int *)p = WORD_NUMBER; + break; + case 8: + *(long long *)p = LONG_NUMBER; + break; + } + + return 1; +} + +static __init void store_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ + struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); + const char *type = fixture->store_type_name; + long long expect = 0; + long long found = -1; + int size; + char *p; + + p = fgraph_retrieve_data(gops->idx, &size); + if (!p) { + snprintf(fixture->error_str_buf, ERRSTR_BUFLEN, + "Failed to retrieve %s\n", type); + return; + } + if (fixture->store_size > size) { + snprintf(fixture->error_str_buf, ERRSTR_BUFLEN, + "Retrieved size %d is smaller than expected %d\n", + size, (int)fixture->store_size); + return; + } + + switch (fixture->store_size) { + case 1: + expect = CHAR_NUMBER; + found = *(char *)p; + break; + case 2: + expect = SHORT_NUMBER; + found = *(short *)p; + break; + case 4: + expect = WORD_NUMBER; + found = *(int *)p; + break; + case 8: + expect = LONG_NUMBER; + found = *(long long *)p; + break; + } + + if (found != expect) { + snprintf(fixture->error_str_buf, ERRSTR_BUFLEN, + "%s returned not %lld but %lld\n", type, expect, found); + return; + } + fixture->error_str = NULL; +} + +static int __init init_fgraph_fixture(struct fgraph_fixture *fixture) +{ + char *func_name; + int len; + + snprintf(fixture->error_str_buf, ERRSTR_BUFLEN, + "Failed to execute storage %s\n", fixture->store_type_name); + fixture->error_str = fixture->error_str_buf; + + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + return ftrace_set_filter(&fixture->gops.ops, func_name, len, 1); +} + +/* Test fgraph storage for each size */ +static int __init test_graph_storage_single(struct fgraph_fixture *fixture) +{ + int size = fixture->store_size; + int ret; + + pr_cont("PASSED\n"); + pr_info("Testing fgraph storage of %d byte%s: ", size, str_plural(size)); + + ret = init_fgraph_fixture(fixture); + if (ret && ret != -ENODEV) { + pr_cont("*Could not set filter* "); + return -1; + } + + ret = register_ftrace_graph(&fixture->gops); + if (ret) { + pr_warn("Failed to init store_bytes fgraph tracing\n"); + return -1; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_graph(&fixture->gops); + + if (fixture->error_str) { + pr_cont("*** %s ***", fixture->error_str); + return -1; + } + + return 0; +} + +static struct fgraph_fixture store_bytes[4] __initdata = { + [0] = { + .gops = { + .entryfunc = store_entry, + .retfunc = store_return, + }, + .store_size = 1, + .store_type_name = "byte", + }, + [1] = { + .gops = { + .entryfunc = store_entry, + .retfunc = store_return, + }, + .store_size = 2, + .store_type_name = "short", + }, + [2] = { + .gops = { + .entryfunc = store_entry, + .retfunc = store_return, + }, + .store_size = 4, + .store_type_name = "word", + }, + [3] = { + .gops = { + .entryfunc = store_entry, + .retfunc = store_return, + }, + .store_size = 8, + .store_type_name = "long long", + }, +}; + +static __init int test_graph_storage_multi(void) +{ + struct fgraph_fixture *fixture; + bool printed = false; + int i, ret; + + pr_cont("PASSED\n"); + pr_info("Testing multiple fgraph storage on a function: "); + + for (i = 0; i < ARRAY_SIZE(store_bytes); i++) { + fixture = &store_bytes[i]; + ret = init_fgraph_fixture(fixture); + if (ret && ret != -ENODEV) { + pr_cont("*Could not set filter* "); + printed = true; + goto out; + } + + ret = register_ftrace_graph(&fixture->gops); + if (ret) { + pr_warn("Failed to init store_bytes fgraph tracing\n"); + printed = true; + goto out; + } + } + + DYN_FTRACE_TEST_NAME(); +out: + while (--i >= 0) { + fixture = &store_bytes[i]; + unregister_ftrace_graph(&fixture->gops); + + if (fixture->error_str && !printed) { + pr_cont("*** %s ***", fixture->error_str); + printed = true; + } + } + return printed ? -1 : 0; +} + +/* Test the storage passed across function_graph entry and return */ +static __init int test_graph_storage(void) +{ + int ret; + + ret = test_graph_storage_single(&store_bytes[0]); + if (ret) + return ret; + ret = test_graph_storage_single(&store_bytes[1]); + if (ret) + return ret; + ret = test_graph_storage_single(&store_bytes[2]); + if (ret) + return ret; + ret = test_graph_storage_single(&store_bytes[3]); + if (ret) + return ret; + ret = test_graph_storage_multi(); + if (ret) + return ret; + return 0; +} +#else +static inline int test_graph_storage(void) { return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + /* Maximum number of functions to trace before diagnosing a hang */ #define GRAPH_MAX_FUNC_TEST 100000000 static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ -static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) +static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -776,7 +1025,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) return 0; } - return trace_graph_entry(trace); + return trace_graph_entry(trace, gops); } static struct fgraph_ops fgraph_ops __initdata = { @@ -812,7 +1061,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * to detect and recover from possible hangs */ tracing_reset_online_cpus(&tr->array_buffer); - set_graph_array(tr); + fgraph_ops.private = tr; ret = register_ftrace_graph(&fgraph_ops); if (ret) { warn_failed_init_tracer(trace, ret); @@ -855,7 +1104,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, cond_resched(); tracing_reset_online_cpus(&tr->array_buffer); - set_graph_array(tr); + fgraph_ops.private = tr; /* * Some archs *cough*PowerPC*cough* add characters to the @@ -912,6 +1161,8 @@ trace_selftest_startup_function_graph(struct tracer *trace, ftrace_set_global_filter(NULL, 0, 1); #endif + ret = test_graph_storage(); + /* Don't test dynamic tracing, the function tracer already did */ out: /* Stop it if we failed */ diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc new file mode 100644 index 000000000000..ff88f97e41fb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc @@ -0,0 +1,103 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" +INSTANCE3="instances/test3_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi + if [ -d $INSTANCE3 ]; then + echo nop > $INSTANCE3/current_tracer + rmdir $INSTANCE3 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 +mkdir $INSTANCE3 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +do_test() { + REGEX=$1 + TEST=$2 + + # filter something, schedule is always good + if ! echo "$REGEX" > set_ftrace_filter; then + fail "can not enable filter $REGEX" + fi + + echo > trace + echo function_graph > current_tracer + enable_tracing + sleep 1 + # search for functions (has "{" or ";" on the line) + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep -v "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -ne 0 ]; then + fail "Graph filtering not working by itself against $TEST?" + fi + + # Make sure we did find something + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -eq 0 ]; then + fail "No traces found with $TEST?" + fi +} + +do_test '*sched*' 'sched' +cd $INSTANCE1 +do_test '*lock*' 'lock' +cd $WD +cd $INSTANCE2 +do_test '*rcu*' 'rcu' +cd $WD +cd $INSTANCE3 +echo function_graph > current_tracer + +sleep 1 +count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` +if [ $count -eq 0 ]; then + fail "No traces found with all tracing?" +fi + +cd $WD +echo nop > current_tracer +echo nop > $INSTANCE1/current_tracer +echo nop > $INSTANCE2/current_tracer +echo nop > $INSTANCE3/current_tracer + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 2f7211254529..8dcce001881d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -8,12 +8,18 @@ # Also test it on an instance directory do_function_fork=1 +do_funcgraph_proc=1 if [ ! -f options/function-fork ]; then do_function_fork=0 echo "no option for function-fork found. Option will not be tested." fi +if [ ! -f options/funcgraph-proc ]; then + do_funcgraph_proc=0 + echo "no option for function-fork found. Option will not be tested." +fi + read PID _ < /proc/self/stat if [ $do_function_fork -eq 1 ]; then @@ -21,12 +27,19 @@ if [ $do_function_fork -eq 1 ]; then orig_value=`grep function-fork trace_options` fi +if [ $do_funcgraph_proc -eq 1 ]; then + orig_value2=`cat options/funcgraph-proc` + echo 1 > options/funcgraph-proc +fi + do_reset() { - if [ $do_function_fork -eq 0 ]; then - return + if [ $do_function_fork -eq 1 ]; then + echo $orig_value > trace_options fi - echo $orig_value > trace_options + if [ $do_funcgraph_proc -eq 1 ]; then + echo $orig_value2 > options/funcgraph-proc + fi } fail() { # msg @@ -36,13 +49,15 @@ fail() { # msg } do_test() { + TRACER=$1 + disable_tracing echo do_execve* > set_ftrace_filter echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid - echo function > current_tracer + echo $TRACER > current_tracer if [ $do_function_fork -eq 1 ]; then # don't allow children to be traced @@ -82,7 +97,11 @@ do_test() { fi } -do_test +do_test function +if grep -s function_graph available_tracers; then + do_test function_graph +fi + do_reset exit 0