From 71566a0d161edec70361b7f90f6e54af6a6d5d05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 12:57:20 +0100 Subject: [PATCH 1/6] tracing/fastboot: Enable boot tracing only during initcalls Impact: modify boot tracer We used to disable the initcall tracing at a specified time (IE: end of builtin initcalls). But we don't need it anymore. It will be stopped when initcalls are finished. However we want two things: _Start this tracing only after pre-smp initcalls are finished. _Since we are planning to trace sched_switches at the same time, we want to enable them only during the initcall execution. For this purpose, this patch introduce two functions to enable/disable the sched_switch tracing during boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- init/main.c | 4 +++- kernel/trace/trace_boot.c | 28 ++++++++++++++++------------ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037c..4642959e5bda 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -234,6 +234,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ struct boot_trace { pid_t caller; char func[KSYM_NAME_LEN]; @@ -244,13 +249,28 @@ struct boot_trace { }; #ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ extern void start_boot_trace(void); -extern void stop_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); #else static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } -static inline void stop_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } #endif diff --git a/init/main.c b/init/main.c index 7e117a231af1..4b03cd5656ca 100644 --- a/init/main.c +++ b/init/main.c @@ -711,6 +711,7 @@ int do_one_initcall(initcall_t fn) it.caller = task_pid_nr(current); printk("calling %pF @ %i\n", fn, it.caller); it.calltime = ktime_get(); + enable_boot_trace(); } it.result = fn(); @@ -722,6 +723,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned %d after %Ld usecs\n", fn, it.result, it.duration); trace_boot(&it, fn); + disable_boot_trace(); } msgbuf[0] = 0; @@ -882,7 +884,7 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - stop_boot_trace(); + init_post(); return 0; } diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d0a5e50eeff2..d104d5b46413 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -13,23 +13,29 @@ #include "trace.h" static struct trace_array *boot_trace; -static int trace_boot_enabled; +static bool pre_initcalls_finished; - -/* Should be started after do_pre_smp_initcalls() in init/main.c */ +/* Tells the boot tracer that the pre_smp_initcalls are finished. + * So we are ready . + * It doesn't enable sched events tracing however. + * You have to call enable_boot_trace to do so. + */ void start_boot_trace(void) { - trace_boot_enabled = 1; + pre_initcalls_finished = true; } -void stop_boot_trace(void) +void enable_boot_trace(void) +{ +} + +void disable_boot_trace(void) { - trace_boot_enabled = 0; } void reset_boot_trace(struct trace_array *tr) { - stop_boot_trace(); + disable_boot_trace(); } static void boot_trace_init(struct trace_array *tr) @@ -37,8 +43,6 @@ static void boot_trace_init(struct trace_array *tr) int cpu; boot_trace = tr; - trace_boot_enabled = 0; - for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); } @@ -46,9 +50,9 @@ static void boot_trace_init(struct trace_array *tr) static void boot_trace_ctrl_update(struct trace_array *tr) { if (tr->ctrl) - start_boot_trace(); + enable_boot_trace(); else - stop_boot_trace(); + disable_boot_trace(); } static enum print_line_t initcall_print_line(struct trace_iterator *iter) @@ -99,7 +103,7 @@ void trace_boot(struct boot_trace *it, initcall_t fn) unsigned long irq_flags; struct trace_array *tr = boot_trace; - if (!trace_boot_enabled) + if (!pre_initcalls_finished) return; /* Get its name now since this function could From 07695fa04e8a3384b0c855398ce1f7885bd7dc3b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:08:28 +0100 Subject: [PATCH 2/6] tracing/ftrace: fix a race condition in sched_switch tracer Impact: fix race condition in sched_switch tracer This patch fixes a race condition in the sched_switch tracer. If several tasks (IE: concurrent initcalls) are playing with tracing_start_cmdline_record() and tracing_stop_cmdline_record(), the following situation could happen: _ Task A and B are using the same tracepoint probe. Task A holds it. Task B is sleeping and doesn't hold it. _ Task A frees the sched tracer, then sched_ref is decremented to 0. _ Task A is preempted and hadn't yet unregistered its tracepoint probe, then B runs. _ B increments sched_ref, sees it's 1 and then guess it has to register its probe. But it has not been freed by task A. _ A lot of bad things can happen after that... Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace_sched_switch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b8f56beb1a62..59de5141207c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -17,6 +17,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; static atomic_t sched_ref; +static DEFINE_MUTEX(tracepoint_mutex); static void probe_sched_switch(struct rq *__rq, struct task_struct *prev, @@ -125,18 +126,22 @@ static void tracing_start_sched_switch(void) { long ref; + mutex_lock(&tracepoint_mutex); ref = atomic_inc_return(&sched_ref); if (ref == 1) tracing_sched_register(); + mutex_unlock(&tracepoint_mutex); } static void tracing_stop_sched_switch(void) { long ref; + mutex_lock(&tracepoint_mutex); ref = atomic_dec_and_test(&sched_ref); if (ref) tracing_sched_unregister(); + mutex_unlock(&tracepoint_mutex); } void tracing_start_cmdline_record(void) From e55f605c14679c30be41473e60b7ad26524cdc35 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:14:28 +0100 Subject: [PATCH 3/6] tracing/ftrace: remove unused code in sched_switch tracer Impact: cleanup When init_sched_switch_trace() is called, it has no reason to start the sched tracer if the sched_ref is not zero. _ If this is non-zero, the tracer is already used, but we can register it to the tracing engine. There is already a security which avoid the tracer probes not to be resgistered twice. _ If this is zero, this block will not be used. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace_sched_switch.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 59de5141207c..96620c714300 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -203,14 +203,6 @@ static struct tracer sched_switch_trace __read_mostly = __init static int init_sched_switch_trace(void) { - int ret = 0; - - if (atomic_read(&sched_ref)) - ret = tracing_sched_register(); - if (ret) { - pr_info("error registering scheduler trace\n"); - return ret; - } return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); From d7ad44b697c9d13e445ddc7d16f736fbac333249 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:20:08 +0100 Subject: [PATCH 4/6] tracing/fastboot: use sched switch tracer from boot tracer Impact: enhance boot trace output with scheduling events Use the sched_switch tracer from the boot tracer. We also can trace schedule events inside the initcalls. Sched tracing is disabled after the initcall has finished and then reenabled before the next one is started. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ kernel/trace/trace.h | 1 + kernel/trace/trace_boot.c | 6 ++++++ kernel/trace/trace_sched_switch.c | 6 +++--- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4c40c868d67..50d7018163f6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3251,6 +3251,8 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); #ifdef CONFIG_BOOT_TRACER + /* We don't want to launch sched_switch tracer yet */ + global_trace.ctrl = 0; register_tracer(&boot_tracer); current_trace = &boot_tracer; current_trace->init(&global_trace); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8465ad052707..9911277b268b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -49,6 +49,7 @@ struct ftrace_entry { unsigned long parent_ip; }; extern struct tracer boot_tracer; +extern struct tracer sched_switch_trace; /* Used by the boot tracer */ /* * Context switch trace entry - which task (and prio) we switched from/to: diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d104d5b46413..6bbc8794a6df 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -27,10 +27,14 @@ void start_boot_trace(void) void enable_boot_trace(void) { + if (pre_initcalls_finished) + tracing_start_cmdline_record(); } void disable_boot_trace(void) { + if (pre_initcalls_finished) + tracing_stop_cmdline_record(); } void reset_boot_trace(struct trace_array *tr) @@ -45,6 +49,8 @@ static void boot_trace_init(struct trace_array *tr) for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); + + sched_switch_trace.init(tr); } static void boot_trace_ctrl_update(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 96620c714300..9d7bdac331dd 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -127,6 +127,7 @@ static void tracing_start_sched_switch(void) long ref; mutex_lock(&tracepoint_mutex); + tracer_enabled = 1; ref = atomic_inc_return(&sched_ref); if (ref == 1) tracing_sched_register(); @@ -138,6 +139,7 @@ static void tracing_stop_sched_switch(void) long ref; mutex_lock(&tracepoint_mutex); + tracer_enabled = 0; ref = atomic_dec_and_test(&sched_ref); if (ref) tracing_sched_unregister(); @@ -158,12 +160,10 @@ static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); tracing_start_cmdline_record(); - tracer_enabled = 1; } static void stop_sched_trace(struct trace_array *tr) { - tracer_enabled = 0; tracing_stop_cmdline_record(); } @@ -190,7 +190,7 @@ static void sched_switch_trace_ctrl_update(struct trace_array *tr) stop_sched_trace(tr); } -static struct tracer sched_switch_trace __read_mostly = +struct tracer sched_switch_trace __read_mostly = { .name = "sched_switch", .init = sched_switch_trace_init, From efade6e7821c4219818e9da08f9315dfa617048b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:28:58 +0100 Subject: [PATCH 5/6] tracing/ftrace: types and naming corrections for sched tracer Impact: cleanup This patch applies some corrections suggested by Steven Rostedt. Change the type of shed_ref into int since it is used into a Mutex, we don't need it anymore as an atomic variable in the sched_switch tracer. Also change the name of the register mutex. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace_sched_switch.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9d7bdac331dd..969953bf678f 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,8 +16,8 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; -static atomic_t sched_ref; -static DEFINE_MUTEX(tracepoint_mutex); +static int sched_ref; +static DEFINE_MUTEX(sched_register_mutex); static void probe_sched_switch(struct rq *__rq, struct task_struct *prev, @@ -28,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, int cpu; int pc; - if (!atomic_read(&sched_ref)) + if (!sched_ref) return; tracing_record_cmdline(prev); @@ -124,26 +124,22 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(void) { - long ref; - - mutex_lock(&tracepoint_mutex); - tracer_enabled = 1; - ref = atomic_inc_return(&sched_ref); - if (ref == 1) + mutex_lock(&sched_register_mutex); + if (!(sched_ref++)) { + tracer_enabled = 1; tracing_sched_register(); - mutex_unlock(&tracepoint_mutex); + } + mutex_unlock(&sched_register_mutex); } static void tracing_stop_sched_switch(void) { - long ref; - - mutex_lock(&tracepoint_mutex); - tracer_enabled = 0; - ref = atomic_dec_and_test(&sched_ref); - if (ref) + mutex_lock(&sched_register_mutex); + if (!(--sched_ref)) { tracing_sched_unregister(); - mutex_unlock(&tracepoint_mutex); + tracer_enabled = 0; + } + mutex_unlock(&sched_register_mutex); } void tracing_start_cmdline_record(void) From 79a9d461fd521f133f0e66485aa9ed09c21f5191 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:34:45 +0100 Subject: [PATCH 6/6] tracing/ftrace: fix a bug when switch current tracer to sched tracer Impact: fix boot tracer + sched tracer coupling bug Fix a bug that made the sched_switch tracer unable to run if set as the current_tracer after the boot tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace_boot.c | 4 ++-- kernel/trace/trace_sched_switch.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 6bbc8794a6df..bd5046c9deb7 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -37,9 +37,9 @@ void disable_boot_trace(void) tracing_stop_cmdline_record(); } -void reset_boot_trace(struct trace_array *tr) +static void reset_boot_trace(struct trace_array *tr) { - disable_boot_trace(); + sched_switch_trace.reset(tr); } static void boot_trace_init(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 969953bf678f..888944d3409d 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -173,7 +173,7 @@ static void sched_switch_trace_init(struct trace_array *tr) static void sched_switch_trace_reset(struct trace_array *tr) { - if (tr->ctrl) + if (tr->ctrl && sched_ref) stop_sched_trace(tr); }