From 05ea0424f0e21c0ef9b47c89826e7c22ae137975 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Nov 2021 09:33:00 -0600 Subject: [PATCH] exit: Move oops specific logic from do_exit into make_task_dead The beginning of do_exit has become cluttered and difficult to read as it is filled with checks to handle things that can only happen when the kernel is operating improperly. Now that we have a dedicated function for cleaning up a task when the kernel is operating improperly move the checks there. Signed-off-by: "Eric W. Biederman" --- kernel/exit.c | 78 ++++++++++++++++++++++----------------------- kernel/futex/core.c | 2 +- kernel/kexec_core.c | 2 +- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index bfa513c5b227..d0ec6f6b41cb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -735,36 +735,8 @@ void __noreturn do_exit(long code) struct task_struct *tsk = current; int group_dead; - /* - * We can get here from a kernel oops, sometimes with preemption off. - * Start by checking for critical errors. - * Then fix up important state like USER_DS and preemption. - * Then do everything else. - */ - WARN_ON(blk_needs_flush_plug(tsk)); - if (unlikely(in_interrupt())) - panic("Aiee, killing interrupt handler!"); - if (unlikely(!tsk->pid)) - panic("Attempted to kill the idle task!"); - - /* - * If do_exit is called because this processes oopsed, it's possible - * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before - * continuing. Amongst other possible reasons, this is to prevent - * mm_release()->clear_child_tid() from writing to a user-controlled - * kernel address. - */ - force_uaccess_begin(); - - if (unlikely(in_atomic())) { - pr_info("note: %s[%d] exited with preempt_count %d\n", - current->comm, task_pid_nr(current), - preempt_count()); - preempt_count_set(PREEMPT_ENABLED); - } - profile_task_exit(tsk); kcov_task_exit(tsk); @@ -773,17 +745,6 @@ void __noreturn do_exit(long code) validate_creds_for_do_exit(tsk); - /* - * We're taking recursive faults here in do_exit. Safest is to just - * leave this task alone and wait for reboot. - */ - if (unlikely(tsk->flags & PF_EXITING)) { - pr_alert("Fixing recursive fault but reboot is needed!\n"); - futex_exit_recursive(tsk); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -889,7 +850,46 @@ void __noreturn make_task_dead(int signr) /* * Take the task off the cpu after something catastrophic has * happened. + * + * We can get here from a kernel oops, sometimes with preemption off. + * Start by checking for critical errors. + * Then fix up important state like USER_DS and preemption. + * Then do everything else. */ + struct task_struct *tsk = current; + + if (unlikely(in_interrupt())) + panic("Aiee, killing interrupt handler!"); + if (unlikely(!tsk->pid)) + panic("Attempted to kill the idle task!"); + + /* + * If make_task_dead is called because this processes oopsed, it's possible + * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before + * continuing. Amongst other possible reasons, this is to prevent + * mm_release()->clear_child_tid() from writing to a user-controlled + * kernel address. + */ + force_uaccess_begin(); + + if (unlikely(in_atomic())) { + pr_info("note: %s[%d] exited with preempt_count %d\n", + current->comm, task_pid_nr(current), + preempt_count()); + preempt_count_set(PREEMPT_ENABLED); + } + + /* + * We're taking recursive faults here in make_task_dead. Safest is to just + * leave this task alone and wait for reboot. + */ + if (unlikely(tsk->flags & PF_EXITING)) { + pr_alert("Fixing recursive fault but reboot is needed!\n"); + futex_exit_recursive(tsk); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + } + do_exit(signr); } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 25d8a88b32e5..39a1522865b5 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1044,7 +1044,7 @@ static void futex_cleanup(struct task_struct *tsk) * actually finished the futex cleanup. The worst case for this is that the * waiter runs through the wait loop until the state becomes visible. * - * This is called from the recursive fault handling path in do_exit(). + * This is called from the recursive fault handling path in make_task_dead(). * * This is best effort. Either the futex exit code has run already or * not. If the OWNER_DIED bit has been set on the futex then the waiter can diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5a5d192a89ac..68480f731192 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -81,7 +81,7 @@ int kexec_should_crash(struct task_struct *p) if (crash_kexec_post_notifiers) return 0; /* - * There are 4 panic() calls in do_exit() path, each of which + * There are 4 panic() calls in make_task_dead() path, each of which * corresponds to each of these 4 conditions. */ if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)