mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-12 16:54:42 +08:00
1ec6574a3c
ordinary user mode tasks. In commit40966e316f
("kthread: Ensure struct kthread is present for all kthreads") caused init and the user mode helper threads that call kernel_execve to have struct kthread allocated for them. This struct kthread going away during execve in turned made a use after free of struct kthread possible. The commit343f4c49f2
("kthread: Don't allocate kthread_struct for init and umh") is enough to fix the use after free and is simple enough to be backportable. The rest of the changes pass struct kernel_clone_args to clean things up and cause the code to make sense. In making init and the user mode helpers tasks purely user mode tasks I ran into two complications. The function task_tick_numa was detecting tasks without an mm by testing for the presence of PF_KTHREAD. The initramfs code in populate_initrd_image was using flush_delayed_fput to ensuere the closing of all it's file descriptors was complete, and flush_delayed_fput does not work in a userspace thread. I have looked and looked and more complications and in my code review I have not found any, and neither has anyone else with the code sitting in linux-next. Link: https://lkml.kernel.org/r/87mtfu4up3.fsf@email.froward.int.ebiederm.org Eric W. Biederman (8): kthread: Don't allocate kthread_struct for init and umh fork: Pass struct kernel_clone_args into copy_thread fork: Explicity test for idle tasks in copy_thread fork: Generalize PF_IO_WORKER handling init: Deal with the init process being a user mode process fork: Explicitly set PF_KTHREAD fork: Stop allowing kthreads to call execve sched: Update task_tick_numa to ignore tasks without an mm arch/alpha/kernel/process.c | 13 ++++++------ arch/arc/kernel/process.c | 13 ++++++------ arch/arm/kernel/process.c | 12 ++++++----- arch/arm64/kernel/process.c | 12 ++++++----- arch/csky/kernel/process.c | 15 ++++++------- arch/h8300/kernel/process.c | 10 ++++----- arch/hexagon/kernel/process.c | 12 ++++++----- arch/ia64/kernel/process.c | 15 +++++++------ arch/m68k/kernel/process.c | 12 ++++++----- arch/microblaze/kernel/process.c | 12 ++++++----- arch/mips/kernel/process.c | 13 ++++++------ arch/nios2/kernel/process.c | 12 ++++++----- arch/openrisc/kernel/process.c | 12 ++++++----- arch/parisc/kernel/process.c | 18 +++++++++------- arch/powerpc/kernel/process.c | 15 +++++++------ arch/riscv/kernel/process.c | 12 ++++++----- arch/s390/kernel/process.c | 12 ++++++----- arch/sh/kernel/process_32.c | 12 ++++++----- arch/sparc/kernel/process_32.c | 12 ++++++----- arch/sparc/kernel/process_64.c | 12 ++++++----- arch/um/kernel/process.c | 15 +++++++------ arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/include/asm/switch_to.h | 8 +++---- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/process.c | 18 +++++++++------- arch/xtensa/kernel/process.c | 17 ++++++++------- fs/exec.c | 8 ++++--- include/linux/sched/task.h | 8 +++++-- init/initramfs.c | 2 ++ init/main.c | 2 +- kernel/fork.c | 46 +++++++++++++++++++++++++++++++++------- kernel/sched/fair.c | 2 +- kernel/umh.c | 6 +++--- 33 files changed, 234 insertions(+), 160 deletions(-) Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmKaR/MACgkQC/v6Eiaj j0Aayg/7Bx66872d9c6igkJ+MPCTuh+v9QKCGwiYEmiU4Q5sVAFB0HPJO27qC14u 630X0RFNZTkPzNNEJNIW4kw6Dj8s8YRKf+FgQAVt4SzdRwT7eIPDjk1nGraopPJ3 O04pjvuTmUyidyViRyFcf2ptx/pnkrwP8jUSc+bGTgfASAKAgAokqKE5ecjewbBc Y/EAkQ6QW7KxPjeSmpAHwI+t3BpBev9WEC4PbhRhsBCQFO2+PJiklvqdhVNBnIjv qUezll/1xv9UYgniB15Q4Nb722SmnWSU3r8as1eFPugzTHizKhufrrpyP+KMK1A0 tdtEJNs5t2DZF7ZbGTFSPqJWmyTYLrghZdO+lOmnaSjHxK4Nda1d4NzbefJ0u+FE tutewowvHtBX6AFIbx+H3O+DOJM2IgNMf+ReQDU/TyNyVf3wBrTbsr9cLxypIJIp zze8npoLMlB7B4yxVo5ES5e63EXfi3iHl0L3/1EhoGwriRz1kWgVLUX/VZOUpscL RkJHsW6bT8sqxPWAA5kyWjEN+wNR2PxbXi8OE4arT0uJrEBMUgDCzydzOv5tJB00 mSQdytxH9LVdsmxBKAOBp5X6WOLGA4yb1cZ6E/mEhlqXMpBDF1DaMfwbWqxSYi4q sp5zU3SBAW0qceiZSsWZXInfbjrcQXNV/DkDRDO9OmzEZP4m1j0= =x6fy -----END PGP SIGNATURE----- Merge tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull kthread updates from Eric Biederman: "This updates init and user mode helper tasks to be ordinary user mode tasks. Commit40966e316f
("kthread: Ensure struct kthread is present for all kthreads") caused init and the user mode helper threads that call kernel_execve to have struct kthread allocated for them. This struct kthread going away during execve in turned made a use after free of struct kthread possible. Here, commit343f4c49f2
("kthread: Don't allocate kthread_struct for init and umh") is enough to fix the use after free and is simple enough to be backportable. The rest of the changes pass struct kernel_clone_args to clean things up and cause the code to make sense. In making init and the user mode helpers tasks purely user mode tasks I ran into two complications. The function task_tick_numa was detecting tasks without an mm by testing for the presence of PF_KTHREAD. The initramfs code in populate_initrd_image was using flush_delayed_fput to ensuere the closing of all it's file descriptors was complete, and flush_delayed_fput does not work in a userspace thread. I have looked and looked and more complications and in my code review I have not found any, and neither has anyone else with the code sitting in linux-next" * tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: sched: Update task_tick_numa to ignore tasks without an mm fork: Stop allowing kthreads to call execve fork: Explicitly set PF_KTHREAD init: Deal with the init process being a user mode process fork: Generalize PF_IO_WORKER handling fork: Explicity test for idle tasks in copy_thread fork: Pass struct kernel_clone_args into copy_thread kthread: Don't allocate kthread_struct for init and umh
302 lines
7.9 KiB
C
302 lines
7.9 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* PARISC Architecture-dependent parts of process handling
|
|
* based on the work for i386
|
|
*
|
|
* Copyright (C) 1999-2003 Matthew Wilcox <willy at parisc-linux.org>
|
|
* Copyright (C) 2000 Martin K Petersen <mkp at mkp.net>
|
|
* Copyright (C) 2000 John Marvin <jsm at parisc-linux.org>
|
|
* Copyright (C) 2000 David Huggins-Daines <dhd with pobox.org>
|
|
* Copyright (C) 2000-2003 Paul Bame <bame at parisc-linux.org>
|
|
* Copyright (C) 2000 Philipp Rumpf <prumpf with tux.org>
|
|
* Copyright (C) 2000 David Kennedy <dkennedy with linuxcare.com>
|
|
* Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
|
|
* Copyright (C) 2000 Grant Grundler <grundler with parisc-linux.org>
|
|
* Copyright (C) 2001 Alan Modra <amodra at parisc-linux.org>
|
|
* Copyright (C) 2001-2002 Ryan Bradetich <rbrad at parisc-linux.org>
|
|
* Copyright (C) 2001-2014 Helge Deller <deller@gmx.de>
|
|
* Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
|
|
*/
|
|
#include <linux/elf.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/module.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/reboot.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/debug.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/random.h>
|
|
#include <linux/nmi.h>
|
|
#include <linux/sched/hotplug.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/assembly.h>
|
|
#include <asm/pdc.h>
|
|
#include <asm/pdc_chassis.h>
|
|
#include <asm/unwind.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/cacheflush.h>
|
|
|
|
#define COMMAND_GLOBAL F_EXTEND(0xfffe0030)
|
|
#define CMD_RESET 5 /* reset any module */
|
|
|
|
/*
|
|
** The Wright Brothers and Gecko systems have a H/W problem
|
|
** (Lasi...'nuf said) may cause a broadcast reset to lockup
|
|
** the system. An HVERSION dependent PDC call was developed
|
|
** to perform a "safe", platform specific broadcast reset instead
|
|
** of kludging up all the code.
|
|
**
|
|
** Older machines which do not implement PDC_BROADCAST_RESET will
|
|
** return (with an error) and the regular broadcast reset can be
|
|
** issued. Obviously, if the PDC does implement PDC_BROADCAST_RESET
|
|
** the PDC call will not return (the system will be reset).
|
|
*/
|
|
void machine_restart(char *cmd)
|
|
{
|
|
#ifdef FASTBOOT_SELFTEST_SUPPORT
|
|
/*
|
|
** If user has modified the Firmware Selftest Bitmap,
|
|
** run the tests specified in the bitmap after the
|
|
** system is rebooted w/PDC_DO_RESET.
|
|
**
|
|
** ftc_bitmap = 0x1AUL "Skip destructive memory tests"
|
|
**
|
|
** Using "directed resets" at each processor with the MEM_TOC
|
|
** vector cleared will also avoid running destructive
|
|
** memory self tests. (Not implemented yet)
|
|
*/
|
|
if (ftc_bitmap) {
|
|
pdc_do_firm_test_reset(ftc_bitmap);
|
|
}
|
|
#endif
|
|
/* set up a new led state on systems shipped with a LED State panel */
|
|
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_SHUTDOWN);
|
|
|
|
/* "Normal" system reset */
|
|
pdc_do_reset();
|
|
|
|
/* Nope...box should reset with just CMD_RESET now */
|
|
gsc_writel(CMD_RESET, COMMAND_GLOBAL);
|
|
|
|
/* Wait for RESET to lay us to rest. */
|
|
while (1) ;
|
|
|
|
}
|
|
|
|
void (*chassis_power_off)(void);
|
|
|
|
/*
|
|
* This routine is called from sys_reboot to actually turn off the
|
|
* machine
|
|
*/
|
|
void machine_power_off(void)
|
|
{
|
|
/* If there is a registered power off handler, call it. */
|
|
if (chassis_power_off)
|
|
chassis_power_off();
|
|
|
|
/* Put the soft power button back under hardware control.
|
|
* If the user had already pressed the power button, the
|
|
* following call will immediately power off. */
|
|
pdc_soft_power_button(0);
|
|
|
|
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_SHUTDOWN);
|
|
|
|
/* ipmi_poweroff may have been installed. */
|
|
do_kernel_power_off();
|
|
|
|
/* It seems we have no way to power the system off via
|
|
* software. The user has to press the button himself. */
|
|
|
|
printk(KERN_EMERG "System shut down completed.\n"
|
|
"Please power this system off now.");
|
|
|
|
/* prevent soft lockup/stalled CPU messages for endless loop. */
|
|
rcu_sysrq_start();
|
|
lockup_detector_soft_poweroff();
|
|
for (;;);
|
|
}
|
|
|
|
void (*pm_power_off)(void);
|
|
EXPORT_SYMBOL(pm_power_off);
|
|
|
|
void machine_halt(void)
|
|
{
|
|
machine_power_off();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
{
|
|
/* Only needs to handle fpu stuff or perf monitors.
|
|
** REVISIT: several arches implement a "lazy fpu state".
|
|
*/
|
|
}
|
|
|
|
void release_thread(struct task_struct *dead_task)
|
|
{
|
|
}
|
|
|
|
/*
|
|
* Idle thread support
|
|
*
|
|
* Detect when running on QEMU with SeaBIOS PDC Firmware and let
|
|
* QEMU idle the host too.
|
|
*/
|
|
|
|
int running_on_qemu __ro_after_init;
|
|
EXPORT_SYMBOL(running_on_qemu);
|
|
|
|
/*
|
|
* Called from the idle thread for the CPU which has been shutdown.
|
|
*/
|
|
void arch_cpu_idle_dead(void)
|
|
{
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
idle_task_exit();
|
|
|
|
local_irq_disable();
|
|
|
|
/* Tell __cpu_die() that this CPU is now safe to dispose of. */
|
|
(void)cpu_report_death();
|
|
|
|
/* Ensure that the cache lines are written out. */
|
|
flush_cache_all_local();
|
|
flush_tlb_all_local(NULL);
|
|
|
|
/* Let PDC firmware put CPU into firmware idle loop. */
|
|
__pdc_cpu_rendezvous();
|
|
|
|
pr_warn("PDC does not provide rendezvous function.\n");
|
|
#endif
|
|
while (1);
|
|
}
|
|
|
|
void __cpuidle arch_cpu_idle(void)
|
|
{
|
|
raw_local_irq_enable();
|
|
|
|
/* nop on real hardware, qemu will idle sleep. */
|
|
asm volatile("or %%r10,%%r10,%%r10\n":::);
|
|
}
|
|
|
|
static int __init parisc_idle_init(void)
|
|
{
|
|
if (!running_on_qemu)
|
|
cpu_idle_poll_ctrl(1);
|
|
|
|
return 0;
|
|
}
|
|
arch_initcall(parisc_idle_init);
|
|
|
|
/*
|
|
* Copy architecture-specific thread state
|
|
*/
|
|
int
|
|
copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
|
{
|
|
unsigned long clone_flags = args->flags;
|
|
unsigned long usp = args->stack;
|
|
unsigned long tls = args->tls;
|
|
struct pt_regs *cregs = &(p->thread.regs);
|
|
void *stack = task_stack_page(p);
|
|
|
|
/* We have to use void * instead of a function pointer, because
|
|
* function pointers aren't a pointer to the function on 64-bit.
|
|
* Make them const so the compiler knows they live in .text */
|
|
extern void * const ret_from_kernel_thread;
|
|
extern void * const child_return;
|
|
|
|
if (unlikely(args->fn)) {
|
|
/* kernel thread */
|
|
memset(cregs, 0, sizeof(struct pt_regs));
|
|
if (args->idle) /* idle thread */
|
|
return 0;
|
|
/* Must exit via ret_from_kernel_thread in order
|
|
* to call schedule_tail()
|
|
*/
|
|
cregs->ksp = (unsigned long) stack + FRAME_SIZE + PT_SZ_ALGN;
|
|
cregs->kpc = (unsigned long) &ret_from_kernel_thread;
|
|
/*
|
|
* Copy function and argument to be called from
|
|
* ret_from_kernel_thread.
|
|
*/
|
|
#ifdef CONFIG_64BIT
|
|
cregs->gr[27] = ((unsigned long *)args->fn)[3];
|
|
cregs->gr[26] = ((unsigned long *)args->fn)[2];
|
|
#else
|
|
cregs->gr[26] = (unsigned long) args->fn;
|
|
#endif
|
|
cregs->gr[25] = (unsigned long) args->fn_arg;
|
|
} else {
|
|
/* user thread */
|
|
/* usp must be word aligned. This also prevents users from
|
|
* passing in the value 1 (which is the signal for a special
|
|
* return for a kernel thread) */
|
|
if (usp) {
|
|
usp = ALIGN(usp, 4);
|
|
if (likely(usp))
|
|
cregs->gr[30] = usp;
|
|
}
|
|
cregs->ksp = (unsigned long) stack + FRAME_SIZE;
|
|
cregs->kpc = (unsigned long) &child_return;
|
|
|
|
/* Setup thread TLS area */
|
|
if (clone_flags & CLONE_SETTLS)
|
|
cregs->cr27 = tls;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
unsigned long
|
|
__get_wchan(struct task_struct *p)
|
|
{
|
|
struct unwind_frame_info info;
|
|
unsigned long ip;
|
|
int count = 0;
|
|
|
|
/*
|
|
* These bracket the sleeping functions..
|
|
*/
|
|
|
|
unwind_frame_init_from_blocked_task(&info, p);
|
|
do {
|
|
if (unwind_once(&info) < 0)
|
|
return 0;
|
|
if (task_is_running(p))
|
|
return 0;
|
|
ip = info.ip;
|
|
if (!in_sched_functions(ip))
|
|
return ip;
|
|
} while (count++ < MAX_UNWIND_ENTRIES);
|
|
return 0;
|
|
}
|
|
|
|
static inline unsigned long brk_rnd(void)
|
|
{
|
|
return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
|
|
}
|
|
|
|
unsigned long arch_randomize_brk(struct mm_struct *mm)
|
|
{
|
|
unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
|
|
|
|
if (ret < mm->brk)
|
|
return mm->brk;
|
|
return ret;
|
|
}
|