From 1029a2b52c09e479fd7b07275812ad97868c0fb0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Sep 2014 10:18:49 +0200 Subject: [PATCH] sched, exit: Deal with nested sleeps do_wait() is a big wait loop, but we set TASK_RUNNING too late; we end up calling potential sleeps before we reset it. Not strictly a bug since we're guaranteed to exit the loop and not call schedule(); put in annotations to quiet might_sleep(). WARNING: CPU: 0 PID: 1 at ../kernel/sched/core.c:7123 __might_sleep+0x7e/0x90() do not call blocking ops when !TASK_RUNNING; state=1 set at [] do_wait+0x88/0x270 Call Trace: [] dump_stack+0x4e/0x7a [] warn_slowpath_common+0x8c/0xc0 [] warn_slowpath_fmt+0x4c/0x50 [] __might_sleep+0x7e/0x90 [] might_fault+0x55/0xb0 [] wait_consider_task+0x90b/0xc10 [] do_wait+0x104/0x270 [] SyS_wait4+0x77/0x100 [] system_call_fastpath+0x16/0x1b Signed-off-by: Peter Zijlstra (Intel) Cc: tglx@linutronix.de Cc: umgwanakikbuti@gmail.com Cc: ilya.dryomov@inktank.com Cc: Alex Elder Cc: Andrew Morton Cc: Axel Lin Cc: Daniel Borkmann Cc: Dave Jones Cc: Guillaume Morin Cc: Ionut Alexa Cc: Jason Baron Cc: Linus Torvalds Cc: Michal Hocko Cc: Michal Schmidt Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Rik van Riel Cc: Rusty Russell Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140924082242.186408915@infradead.org Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 ++ kernel/exit.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3d770f5564b8..5068a0d9fecd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -175,10 +175,12 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) +# define sched_annotate_sleep() __set_current_state(TASK_RUNNING) #else static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define sched_annotate_sleep() do { } while (0) #endif #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) diff --git a/kernel/exit.c b/kernel/exit.c index 5d30019ff953..232c4bc8bcc9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -997,6 +997,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) get_task_struct(p); read_unlock(&tasklist_lock); + sched_annotate_sleep(); + if ((exit_code & 0x7f) == 0) { why = CLD_EXITED; status = exit_code >> 8; @@ -1079,6 +1081,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * thread can reap it because we its state == DEAD/TRACE. */ read_unlock(&tasklist_lock); + sched_annotate_sleep(); retval = wo->wo_rusage ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; @@ -1210,6 +1213,7 @@ unlock_sig: pid = task_pid_vnr(p); why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); + sched_annotate_sleep(); if (unlikely(wo->wo_flags & WNOWAIT)) return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); @@ -1272,6 +1276,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) pid = task_pid_vnr(p); get_task_struct(p); read_unlock(&tasklist_lock); + sched_annotate_sleep(); if (!wo->wo_info) { retval = wo->wo_rusage