2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 06:20:36 +08:00
|
|
|
/* binfmt_elf_fdpic.c: FDPIC ELF binary format
|
|
|
|
*
|
2006-07-10 19:44:53 +08:00
|
|
|
* Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
|
2005-04-17 06:20:36 +08:00
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
* Derived from binfmt_elf.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/sched.h>
|
2017-02-09 01:51:37 +08:00
|
|
|
#include <linux/sched/coredump.h>
|
|
|
|
#include <linux/sched/task_stack.h>
|
2017-02-05 18:48:36 +08:00
|
|
|
#include <linux/sched/cputime.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/mman.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/signal.h>
|
|
|
|
#include <linux/binfmts.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/fcntl.h>
|
|
|
|
#include <linux/slab.h>
|
2006-07-10 19:44:55 +08:00
|
|
|
#include <linux/pagemap.h>
|
2008-10-16 13:04:16 +08:00
|
|
|
#include <linux/security.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/highmem.h>
|
2006-07-10 19:44:55 +08:00
|
|
|
#include <linux/highuid.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/personality.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/elf.h>
|
|
|
|
#include <linux/elf-fdpic.h>
|
|
|
|
#include <linux/elfcore.h>
|
2010-03-06 05:44:06 +08:00
|
|
|
#include <linux/coredump.h>
|
2015-10-06 06:33:37 +08:00
|
|
|
#include <linux/dax.h>
|
2020-06-15 23:22:55 +08:00
|
|
|
#include <linux/regset.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-12-25 03:46:01 +08:00
|
|
|
#include <linux/uaccess.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/param.h>
|
|
|
|
|
|
|
|
typedef char *elf_caddr_t;
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
#define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
|
|
|
|
#else
|
|
|
|
#define kdebug(fmt, ...) do {} while(0)
|
|
|
|
#endif
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
#if 0
|
|
|
|
#define kdcore(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
|
|
|
|
#else
|
|
|
|
#define kdcore(fmt, ...) do {} while(0)
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
|
2012-10-21 10:00:48 +08:00
|
|
|
static int load_elf_fdpic_binary(struct linux_binprm *);
|
2006-07-10 19:44:53 +08:00
|
|
|
static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
|
|
|
|
static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
|
|
|
|
struct mm_struct *, const char *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *,
|
|
|
|
struct elf_fdpic_params *,
|
|
|
|
struct elf_fdpic_params *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifndef CONFIG_MMU
|
2006-07-10 19:44:53 +08:00
|
|
|
static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
|
|
|
|
struct file *,
|
|
|
|
struct mm_struct *);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
|
|
|
|
struct file *, struct mm_struct *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-12-16 08:47:37 +08:00
|
|
|
#ifdef CONFIG_ELF_CORE
|
2009-12-18 07:27:16 +08:00
|
|
|
static int elf_fdpic_core_dump(struct coredump_params *cprm);
|
2006-07-10 19:44:55 +08:00
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static struct linux_binfmt elf_fdpic_format = {
|
|
|
|
.module = THIS_MODULE,
|
|
|
|
.load_binary = load_elf_fdpic_binary,
|
2009-12-16 08:47:37 +08:00
|
|
|
#ifdef CONFIG_ELF_CORE
|
2006-07-10 19:44:55 +08:00
|
|
|
.core_dump = elf_fdpic_core_dump,
|
2005-04-17 06:20:36 +08:00
|
|
|
.min_coredump = ELF_EXEC_PAGESIZE,
|
2022-02-14 03:25:20 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
static int __init init_elf_fdpic_binfmt(void)
|
|
|
|
{
|
2012-03-17 15:05:16 +08:00
|
|
|
register_binfmt(&elf_fdpic_format);
|
|
|
|
return 0;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
static void __exit exit_elf_fdpic_binfmt(void)
|
|
|
|
{
|
|
|
|
unregister_binfmt(&elf_fdpic_format);
|
|
|
|
}
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
core_initcall(init_elf_fdpic_binfmt);
|
2006-07-10 19:44:53 +08:00
|
|
|
module_exit(exit_elf_fdpic_binfmt);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
static int is_elf(struct elfhdr *hdr, struct file *file)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0)
|
|
|
|
return 0;
|
|
|
|
if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)
|
|
|
|
return 0;
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (!elf_check_arch(hdr))
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
2013-09-23 04:27:52 +08:00
|
|
|
if (!file->f_op->mmap)
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
#ifndef elf_check_fdpic
|
|
|
|
#define elf_check_fdpic(x) 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef elf_check_const_displacement
|
|
|
|
#define elf_check_const_displacement(x) 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int is_constdisp(struct elfhdr *hdr)
|
|
|
|
{
|
|
|
|
if (!elf_check_fdpic(hdr))
|
|
|
|
return 1;
|
|
|
|
if (elf_check_const_displacement(hdr))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* read the program headers table into memory
|
|
|
|
*/
|
2006-07-10 19:44:53 +08:00
|
|
|
static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
|
|
|
|
struct file *file)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_phdr *phdr;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long size;
|
|
|
|
int retval, loop;
|
2017-09-01 23:39:13 +08:00
|
|
|
loff_t pos = params->hdr.e_phoff;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (params->hdr.e_phentsize != sizeof(struct elf_phdr))
|
|
|
|
return -ENOMEM;
|
|
|
|
if (params->hdr.e_phnum > 65536U / sizeof(struct elf_phdr))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
size = params->hdr.e_phnum * sizeof(struct elf_phdr);
|
|
|
|
params->phdrs = kmalloc(size, GFP_KERNEL);
|
|
|
|
if (!params->phdrs)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2017-09-01 23:39:13 +08:00
|
|
|
retval = kernel_read(file, params->phdrs, size, &pos);
|
2008-04-29 15:59:34 +08:00
|
|
|
if (unlikely(retval != size))
|
|
|
|
return retval < 0 ? retval : -ENOEXEC;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* determine stack size for this binary */
|
|
|
|
phdr = params->phdrs;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
if (phdr->p_type != PT_GNU_STACK)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (phdr->p_flags & PF_X)
|
|
|
|
params->flags |= ELF_FDPIC_FLAG_EXEC_STACK;
|
|
|
|
else
|
|
|
|
params->flags |= ELF_FDPIC_FLAG_NOEXEC_STACK;
|
|
|
|
|
|
|
|
params->stack_size = phdr->p_memsz;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* load an fdpic binary into various bits of memory
|
|
|
|
*/
|
2012-10-21 10:00:48 +08:00
|
|
|
static int load_elf_fdpic_binary(struct linux_binprm *bprm)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct elf_fdpic_params exec_params, interp_params;
|
2012-10-21 10:00:48 +08:00
|
|
|
struct pt_regs *regs = current_pt_regs();
|
2005-04-17 06:20:36 +08:00
|
|
|
struct elf_phdr *phdr;
|
2006-07-10 19:44:53 +08:00
|
|
|
unsigned long stack_size, entryaddr;
|
|
|
|
#ifdef ELF_FDPIC_PLAT_INIT
|
|
|
|
unsigned long dynaddr;
|
2010-01-07 01:23:17 +08:00
|
|
|
#endif
|
|
|
|
#ifndef CONFIG_MMU
|
|
|
|
unsigned long stack_prot;
|
2006-07-10 19:44:53 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
struct file *interpreter = NULL; /* to shut gcc up */
|
|
|
|
char *interpreter_name = NULL;
|
|
|
|
int executable_stack;
|
|
|
|
int retval, i;
|
2017-09-01 23:39:13 +08:00
|
|
|
loff_t pos;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-03-23 15:10:00 +08:00
|
|
|
kdebug("____ LOAD %d ____", current->pid);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
memset(&exec_params, 0, sizeof(exec_params));
|
|
|
|
memset(&interp_params, 0, sizeof(interp_params));
|
|
|
|
|
|
|
|
exec_params.hdr = *(struct elfhdr *) bprm->buf;
|
|
|
|
exec_params.flags = ELF_FDPIC_FLAG_PRESENT | ELF_FDPIC_FLAG_EXECUTABLE;
|
|
|
|
|
|
|
|
/* check that this is a binary we know how to deal with */
|
|
|
|
retval = -ENOEXEC;
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (!is_elf(&exec_params.hdr, bprm->file))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto error;
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (!elf_check_fdpic(&exec_params.hdr)) {
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
/* binfmt_elf handles non-fdpic elf except on nommu */
|
|
|
|
goto error;
|
|
|
|
#else
|
|
|
|
/* nommu can only load ET_DYN (PIE) ELF */
|
|
|
|
if (exec_params.hdr.e_type != ET_DYN)
|
|
|
|
goto error;
|
|
|
|
#endif
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* read the program header table */
|
|
|
|
retval = elf_fdpic_fetch_phdrs(&exec_params, bprm->file);
|
|
|
|
if (retval < 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/* scan for a program header that specifies an interpreter */
|
|
|
|
phdr = exec_params.phdrs;
|
|
|
|
|
|
|
|
for (i = 0; i < exec_params.hdr.e_phnum; i++, phdr++) {
|
|
|
|
switch (phdr->p_type) {
|
|
|
|
case PT_INTERP:
|
|
|
|
retval = -ENOMEM;
|
|
|
|
if (phdr->p_filesz > PATH_MAX)
|
|
|
|
goto error;
|
|
|
|
retval = -ENOENT;
|
|
|
|
if (phdr->p_filesz < 2)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/* read the name of the interpreter into memory */
|
2006-01-10 12:54:45 +08:00
|
|
|
interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!interpreter_name)
|
|
|
|
goto error;
|
|
|
|
|
2017-09-01 23:39:13 +08:00
|
|
|
pos = phdr->p_offset;
|
|
|
|
retval = kernel_read(bprm->file, interpreter_name,
|
|
|
|
phdr->p_filesz, &pos);
|
2008-04-29 15:59:34 +08:00
|
|
|
if (unlikely(retval != phdr->p_filesz)) {
|
|
|
|
if (retval >= 0)
|
|
|
|
retval = -ENOEXEC;
|
2005-04-17 06:20:36 +08:00
|
|
|
goto error;
|
2008-04-29 15:59:34 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
retval = -ENOENT;
|
|
|
|
if (interpreter_name[phdr->p_filesz - 1] != '\0')
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
kdebug("Using ELF interpreter %s", interpreter_name);
|
|
|
|
|
|
|
|
/* replace the program with the interpreter */
|
|
|
|
interpreter = open_exec(interpreter_name);
|
|
|
|
retval = PTR_ERR(interpreter);
|
|
|
|
if (IS_ERR(interpreter)) {
|
|
|
|
interpreter = NULL;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2007-01-26 16:57:16 +08:00
|
|
|
/*
|
|
|
|
* If the binary is not readable then enforce
|
|
|
|
* mm->dumpable = 0 regardless of the interpreter's
|
|
|
|
* permissions.
|
|
|
|
*/
|
2011-06-20 00:49:47 +08:00
|
|
|
would_dump(bprm, interpreter);
|
2007-01-26 16:57:16 +08:00
|
|
|
|
2017-09-01 23:39:13 +08:00
|
|
|
pos = 0;
|
|
|
|
retval = kernel_read(interpreter, bprm->buf,
|
|
|
|
BINPRM_BUF_SIZE, &pos);
|
2008-04-29 15:59:34 +08:00
|
|
|
if (unlikely(retval != BINPRM_BUF_SIZE)) {
|
|
|
|
if (retval >= 0)
|
|
|
|
retval = -ENOEXEC;
|
2005-04-17 06:20:36 +08:00
|
|
|
goto error;
|
2008-04-29 15:59:34 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
interp_params.hdr = *((struct elfhdr *) bprm->buf);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PT_LOAD:
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
if (exec_params.load_addr == 0)
|
|
|
|
exec_params.load_addr = phdr->p_vaddr;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (is_constdisp(&exec_params.hdr))
|
2005-04-17 06:20:36 +08:00
|
|
|
exec_params.flags |= ELF_FDPIC_FLAG_CONSTDISP;
|
|
|
|
|
|
|
|
/* perform insanity checks on the interpreter */
|
|
|
|
if (interpreter_name) {
|
|
|
|
retval = -ELIBBAD;
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (!is_elf(&interp_params.hdr, interpreter))
|
2005-04-17 06:20:36 +08:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
interp_params.flags = ELF_FDPIC_FLAG_PRESENT;
|
|
|
|
|
|
|
|
/* read the interpreter's program header table */
|
|
|
|
retval = elf_fdpic_fetch_phdrs(&interp_params, interpreter);
|
|
|
|
if (retval < 0)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
stack_size = exec_params.stack_size;
|
|
|
|
if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
|
|
|
|
executable_stack = EXSTACK_ENABLE_X;
|
|
|
|
else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
|
|
|
|
executable_stack = EXSTACK_DISABLE_X;
|
|
|
|
else
|
|
|
|
executable_stack = EXSTACK_DEFAULT;
|
|
|
|
|
2024-01-18 23:06:37 +08:00
|
|
|
if (stack_size == 0 && interp_params.flags & ELF_FDPIC_FLAG_PRESENT) {
|
2009-09-24 06:57:06 +08:00
|
|
|
stack_size = interp_params.stack_size;
|
|
|
|
if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
|
|
|
|
executable_stack = EXSTACK_ENABLE_X;
|
|
|
|
else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
|
|
|
|
executable_stack = EXSTACK_DISABLE_X;
|
|
|
|
else
|
|
|
|
executable_stack = EXSTACK_DEFAULT;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
retval = -ENOEXEC;
|
|
|
|
if (stack_size == 0)
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
stack_size = 131072UL; /* same as exec.c's default commit */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (is_constdisp(&interp_params.hdr))
|
2005-04-17 06:20:36 +08:00
|
|
|
interp_params.flags |= ELF_FDPIC_FLAG_CONSTDISP;
|
|
|
|
|
|
|
|
/* flush all traces of the currently running executable */
|
2020-05-03 20:54:10 +08:00
|
|
|
retval = begin_new_exec(bprm);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (retval)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/* there's now no turning back... the old userspace image is dead,
|
2014-05-05 08:11:36 +08:00
|
|
|
* defunct, deceased, etc.
|
|
|
|
*/
|
2023-09-07 09:18:08 +08:00
|
|
|
SET_PERSONALITY(exec_params.hdr);
|
fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries
The ELF binary loader in binfmt_elf.c requires an MMU, making it
impossible to use regular ELF binaries on NOMMU archs. However, the FDPIC
ELF loader in binfmt_elf_fdpic.c is fully capable as a loader for plain
ELF, which requires constant displacements between LOAD segments, since it
already supports FDPIC ELF files flagged as needing constant displacement.
This patch adjusts the FDPIC ELF loader to accept non-FDPIC ELF files on
NOMMU archs. They are treated identically to FDPIC ELF files with the
constant-displacement flag bit set, except for personality, which must
match the ABI of the program being loaded; the PER_LINUX_FDPIC personality
controls how the kernel interprets function pointers passed to sigaction.
Files that do not set a stack size requirement explicitly are given a
default stack size (matching the amount of committed stack the normal ELF
loader for MMU archs would give them) rather than being rejected; this is
necessary because plain ELF files generally do not declare stack
requirements in theit program headers.
Only ET_DYN (PIE) format ELF files are supported, since loading at a fixed
virtual address is not possible on NOMMU.
This patch was developed and tested on J2 (SH2-compatible) but should
be usable immediately on all archs where binfmt_elf_fdpic is
available. Moreover, by providing dummy definitions of the
elf_check_fdpic() and elf_check_const_displacement() macros for archs
which lack an FDPIC ABI, it should be possible to enable building of
binfmt_elf_fdpic on all other NOMMU archs and thereby give them ELF
binary support, but I have not yet tested this.
The motivation for using binfmt_elf_fdpic.c rather than adapting
binfmt_elf.c to NOMMU is that the former already has all the necessary
code to work properly on NOMMU and has already received widespread
real-world use and testing. I hope this is not controversial.
I'm not really happy with having to unset the FDPIC_FUNCPTRS
personality bit when loading non-FDPIC ELF. This bit should really
reset automatically on execve, since otherwise, executing non-ELF
binaries (e.g. bFLT) from an FDPIC process will leave the personality
in the wrong state and severely break signal handling. But that's a
separate, existing bug and I don't know the right place to fix it.
Signed-off-by: Rich Felker <dalias@libc.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Oleg Endo <oleg.endo@t-online.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-10 06:59:01 +08:00
|
|
|
if (elf_check_fdpic(&exec_params.hdr))
|
2023-09-07 09:18:08 +08:00
|
|
|
current->personality |= PER_LINUX_FDPIC;
|
2010-01-07 01:23:17 +08:00
|
|
|
if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
|
|
|
|
current->personality |= READ_IMPLIES_EXEC;
|
Split 'flush_old_exec' into two functions
'flush_old_exec()' is the point of no return when doing an execve(), and
it is pretty badly misnamed. It doesn't just flush the old executable
environment, it also starts up the new one.
Which is very inconvenient for things like setting up the new
personality, because we want the new personality to affect the starting
of the new environment, but at the same time we do _not_ want the new
personality to take effect if flushing the old one fails.
As a result, the x86-64 '32-bit' personality is actually done using this
insane "I'm going to change the ABI, but I haven't done it yet" bit
(TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the
personality, but just the "pending" bit, so that "flush_thread()" can do
the actual personality magic.
This patch in no way changes any of that insanity, but it does split the
'flush_old_exec()' function up into a preparatory part that can fail
(still called flush_old_exec()), and a new part that will actually set
up the new exec environment (setup_new_exec()). All callers are changed
to trivially comply with the new world order.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-01-29 14:14:42 +08:00
|
|
|
|
|
|
|
setup_new_exec(bprm);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
set_binfmt(&elf_fdpic_format);
|
|
|
|
|
|
|
|
current->mm->start_code = 0;
|
|
|
|
current->mm->end_code = 0;
|
|
|
|
current->mm->start_stack = 0;
|
|
|
|
current->mm->start_data = 0;
|
|
|
|
current->mm->end_data = 0;
|
|
|
|
current->mm->context.exec_fdpic_loadmap = 0;
|
|
|
|
current->mm->context.interp_fdpic_loadmap = 0;
|
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
elf_fdpic_arch_lay_out_mm(&exec_params,
|
|
|
|
&interp_params,
|
|
|
|
¤t->mm->start_stack,
|
|
|
|
¤t->mm->start_brk);
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
retval = setup_arg_pages(bprm, current->mm->start_stack,
|
|
|
|
executable_stack);
|
2014-05-05 08:11:36 +08:00
|
|
|
if (retval < 0)
|
|
|
|
goto error;
|
2017-08-11 12:53:39 +08:00
|
|
|
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
|
|
|
|
retval = arch_setup_additional_pages(bprm, !!interpreter_name);
|
|
|
|
if (retval < 0)
|
|
|
|
goto error;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* load the executable and interpreter into memory */
|
2006-07-10 19:44:53 +08:00
|
|
|
retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm,
|
|
|
|
"executable");
|
2005-04-17 06:20:36 +08:00
|
|
|
if (retval < 0)
|
2014-05-05 08:11:36 +08:00
|
|
|
goto error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (interpreter_name) {
|
|
|
|
retval = elf_fdpic_map_file(&interp_params, interpreter,
|
|
|
|
current->mm, "interpreter");
|
|
|
|
if (retval < 0) {
|
|
|
|
printk(KERN_ERR "Unable to load interpreter\n");
|
2014-05-05 08:11:36 +08:00
|
|
|
goto error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
fput(interpreter);
|
|
|
|
interpreter = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
if (!current->mm->start_brk)
|
|
|
|
current->mm->start_brk = current->mm->end_data;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
current->mm->brk = current->mm->start_brk =
|
|
|
|
PAGE_ALIGN(current->mm->start_brk);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#else
|
2015-08-21 03:11:06 +08:00
|
|
|
/* create a stack area and zero-size brk area */
|
2005-04-17 06:20:36 +08:00
|
|
|
stack_size = (stack_size + PAGE_SIZE - 1) & PAGE_MASK;
|
|
|
|
if (stack_size < PAGE_SIZE * 2)
|
|
|
|
stack_size = PAGE_SIZE * 2;
|
|
|
|
|
2010-01-07 01:23:17 +08:00
|
|
|
stack_prot = PROT_READ | PROT_WRITE;
|
|
|
|
if (executable_stack == EXSTACK_ENABLE_X ||
|
|
|
|
(executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
|
|
|
|
stack_prot |= PROT_EXEC;
|
|
|
|
|
2012-04-21 08:13:58 +08:00
|
|
|
current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
|
2009-12-15 10:00:02 +08:00
|
|
|
MAP_PRIVATE | MAP_ANONYMOUS |
|
|
|
|
MAP_UNINITIALIZED | MAP_GROWSDOWN,
|
2005-04-17 06:20:36 +08:00
|
|
|
0);
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
if (IS_ERR_VALUE(current->mm->start_brk)) {
|
2005-04-17 06:20:36 +08:00
|
|
|
retval = current->mm->start_brk;
|
|
|
|
current->mm->start_brk = 0;
|
2014-05-05 08:11:36 +08:00
|
|
|
goto error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
current->mm->brk = current->mm->start_brk;
|
|
|
|
current->mm->context.end_brk = current->mm->start_brk;
|
|
|
|
current->mm->start_stack = current->mm->start_brk + stack_size;
|
|
|
|
#endif
|
|
|
|
|
2022-12-02 09:41:01 +08:00
|
|
|
retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
|
|
|
|
&interp_params);
|
|
|
|
if (retval < 0)
|
2014-05-05 08:11:36 +08:00
|
|
|
goto error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
kdebug("- start_code %lx", current->mm->start_code);
|
|
|
|
kdebug("- end_code %lx", current->mm->end_code);
|
|
|
|
kdebug("- start_data %lx", current->mm->start_data);
|
|
|
|
kdebug("- end_data %lx", current->mm->end_data);
|
|
|
|
kdebug("- start_brk %lx", current->mm->start_brk);
|
|
|
|
kdebug("- brk %lx", current->mm->brk);
|
|
|
|
kdebug("- start_stack %lx", current->mm->start_stack);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifdef ELF_FDPIC_PLAT_INIT
|
|
|
|
/*
|
|
|
|
* The ABI may specify that certain registers be set up in special
|
|
|
|
* ways (on i386 %edx is the address of a DT_FINI function, for
|
|
|
|
* example. This macro performs whatever initialization to
|
|
|
|
* the regs structure is required.
|
|
|
|
*/
|
2006-07-10 19:44:53 +08:00
|
|
|
dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr;
|
|
|
|
ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr,
|
|
|
|
dynaddr);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2018-04-11 07:34:57 +08:00
|
|
|
finalize_exec(bprm);
|
2005-04-17 06:20:36 +08:00
|
|
|
/* everything is now ready... get the userspace context ready to roll */
|
2006-07-10 19:44:53 +08:00
|
|
|
entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
|
|
|
|
start_thread(regs, entryaddr, current->mm->start_stack);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
retval = 0;
|
|
|
|
|
|
|
|
error:
|
fs: don't block i_writecount during exec
Back in 2021 we already discussed removing deny_write_access() for
executables. Back then I was hesistant because I thought that this might
cause issues in userspace. But even back then I had started taking some
notes on what could potentially depend on this and I didn't come up with
a lot so I've changed my mind and I would like to try this.
Here are some of the notes that I took:
(1) The deny_write_access() mechanism is causing really pointless issues
such as [1]. If a thread in a thread-group opens a file writable,
then writes some stuff, then closing the file descriptor and then
calling execve() they can fail the execve() with ETXTBUSY because
another thread in the thread-group could have concurrently called
fork(). Multi-threaded libraries such as go suffer from this.
(2) There are userspace attacks that rely on overwriting the binary of a
running process. These attacks are _mitigated_ but _not at all
prevented_ from ocurring by the deny_write_access() mechanism.
I'll go over some details. The clearest example of such attacks was
the attack against runC in CVE-2019-5736 (cf. [3]).
An attack could compromise the runC host binary from inside a
_privileged_ runC container. The malicious binary could then be used
to take over the host.
(It is crucial to note that this attack is _not_ possible with
unprivileged containers. IOW, the setup here is already insecure.)
The attack can be made when attaching to a running container or when
starting a container running a specially crafted image. For example,
when runC attaches to a container the attacker can trick it into
executing itself.
This could be done by replacing the target binary inside the
container with a custom binary pointing back at the runC binary
itself. As an example, if the target binary was /bin/bash, this
could be replaced with an executable script specifying the
interpreter path #!/proc/self/exe.
As such when /bin/bash is executed inside the container, instead the
target of /proc/self/exe will be executed. That magic link will
point to the runc binary on the host. The attacker can then proceed
to write to the target of /proc/self/exe to try and overwrite the
runC binary on the host.
However, this will not succeed because of deny_write_access(). Now,
one might think that this would prevent the attack but it doesn't.
To overcome this, the attacker has multiple ways:
* Open a file descriptor to /proc/self/exe using the O_PATH flag and
then proceed to reopen the binary as O_WRONLY through
/proc/self/fd/<nr> and try to write to it in a busy loop from a
separate process. Ultimately it will succeed when the runC binary
exits. After this the runC binary is compromised and can be used
to attack other containers or the host itself.
* Use a malicious shared library annotating a function in there with
the constructor attribute making the malicious function run as an
initializor. The malicious library will then open /proc/self/exe
for creating a new entry under /proc/self/fd/<nr>. It'll then call
exec to a) force runC to exit and b) hand the file descriptor off
to a program that then reopens /proc/self/fd/<nr> for writing
(which is now possible because runC has exited) and overwriting
that binary.
To sum up: the deny_write_access() mechanism doesn't prevent such
attacks in insecure setups. It just makes them minimally harder.
That's all.
The only way back then to prevent this is to create a temporary copy
of the calling binary itself when it starts or attaches to
containers. So what I did back then for LXC (and Aleksa for runC)
was to create an anonymous, in-memory file using the memfd_create()
system call and to copy itself into the temporary in-memory file,
which is then sealed to prevent further modifications. This sealed,
in-memory file copy is then executed instead of the original on-disk
binary.
Any compromising write operations from a privileged container to the
host binary will then write to the temporary in-memory binary and
not to the host binary on-disk, preserving the integrity of the host
binary. Also as the temporary, in-memory binary is sealed, writes to
this will also fail.
The point is that deny_write_access() is uselss to prevent these
attacks.
(3) Denying write access to an inode because it's currently used in an
exec path could easily be done on an LSM level. It might need an
additional hook but that should be about it.
(4) The MAP_DENYWRITE flag for mmap() has been deprecated a long time
ago so while we do protect the main executable the bigger portion of
the things you'd think need protecting such as the shared libraries
aren't. IOW, we let anyone happily overwrite shared libraries.
(5) We removed all remaining uses of VM_DENYWRITE in [2]. That means:
(5.1) We removed the legacy uselib() protection for preventing
overwriting of shared libraries. Nobody cared in 3 years.
(5.2) We allow write access to the elf interpreter after exec
completed treating it on a par with shared libraries.
Yes, someone in userspace could potentially be relying on this. It's not
completely out of the realm of possibility but let's find out if that's
actually the case and not guess.
Link: https://github.com/golang/go/issues/22315 [1]
Link: 49624efa65ac ("Merge tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux") [2]
Link: https://unit42.paloaltonetworks.com/breaking-docker-via-runc-explaining-cve-2019-5736 [3]
Link: https://lwn.net/Articles/866493
Link: https://github.com/golang/go/issues/22220
Link: https://github.com/golang/go/blob/5bf8c0cf09ee5c7e5a37ab90afcce154ab716a97/src/cmd/go/internal/work/buildid.go#L724
Link: https://github.com/golang/go/blob/5bf8c0cf09ee5c7e5a37ab90afcce154ab716a97/src/cmd/go/internal/work/exec.go#L1493
Link: https://github.com/golang/go/blob/5bf8c0cf09ee5c7e5a37ab90afcce154ab716a97/src/cmd/go/internal/script/cmds.go#L457
Link: https://github.com/golang/go/blob/5bf8c0cf09ee5c7e5a37ab90afcce154ab716a97/src/cmd/go/internal/test/test.go#L1557
Link: https://github.com/golang/go/blob/5bf8c0cf09ee5c7e5a37ab90afcce154ab716a97/src/os/exec/lp_linux_test.go#L61
Link: https://github.com/buildkite/agent/pull/2736
Link: https://github.com/rust-lang/rust/issues/114554
Link: https://bugs.openjdk.org/browse/JDK-8068370
Link: https://github.com/dotnet/runtime/issues/58964
Link: https://lore.kernel.org/r/20240531-vfs-i_writecount-v1-1-a17bea7ee36b@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-05-31 21:01:43 +08:00
|
|
|
if (interpreter)
|
2005-04-17 06:20:36 +08:00
|
|
|
fput(interpreter);
|
2005-11-07 17:01:34 +08:00
|
|
|
kfree(interpreter_name);
|
|
|
|
kfree(exec_params.phdrs);
|
|
|
|
kfree(exec_params.loadmap);
|
|
|
|
kfree(interp_params.phdrs);
|
|
|
|
kfree(interp_params.loadmap);
|
2005-04-17 06:20:36 +08:00
|
|
|
return retval;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
2008-10-16 13:04:15 +08:00
|
|
|
|
|
|
|
#ifndef ELF_BASE_PLATFORM
|
|
|
|
/*
|
|
|
|
* AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
|
|
|
|
* If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
|
|
|
|
* will be copied to the user stack in the same manner as AT_PLATFORM.
|
|
|
|
*/
|
|
|
|
#define ELF_BASE_PLATFORM NULL
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2008-10-16 13:04:15 +08:00
|
|
|
* present useful information to the program by shovelling it onto the new
|
|
|
|
* process's stack
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
static int create_elf_fdpic_tables(struct linux_binprm *bprm,
|
|
|
|
struct mm_struct *mm,
|
|
|
|
struct elf_fdpic_params *exec_params,
|
|
|
|
struct elf_fdpic_params *interp_params)
|
|
|
|
{
|
2008-11-14 07:39:18 +08:00
|
|
|
const struct cred *cred = current_cred();
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long sp, csp, nitems;
|
2006-06-23 17:04:05 +08:00
|
|
|
elf_caddr_t __user *argv, *envp;
|
2005-04-17 06:20:36 +08:00
|
|
|
size_t platform_len = 0, len;
|
2008-10-16 13:04:15 +08:00
|
|
|
char *k_platform, *k_base_platform;
|
|
|
|
char __user *u_platform, *u_base_platform, *p;
|
2005-04-17 06:20:36 +08:00
|
|
|
int loop;
|
2020-01-28 21:25:39 +08:00
|
|
|
unsigned long flags = 0;
|
2024-03-23 03:54:18 +08:00
|
|
|
int ei_index;
|
|
|
|
elf_addr_t *elf_info;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
2008-10-16 13:04:15 +08:00
|
|
|
/* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
|
|
|
|
* by the processes running on the same package. One thing we can do is
|
|
|
|
* to shuffle the initial stack for them, so we give the architecture
|
|
|
|
* an opportunity to do so here.
|
|
|
|
*/
|
|
|
|
sp = arch_align_stack(bprm->p);
|
2005-04-17 06:20:36 +08:00
|
|
|
#else
|
|
|
|
sp = mm->start_stack;
|
|
|
|
|
|
|
|
/* stack the program arguments and environment */
|
2016-07-24 23:30:18 +08:00
|
|
|
if (transfer_args_to_stack(bprm, &sp) < 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
2016-07-24 23:30:18 +08:00
|
|
|
sp &= ~15;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2008-10-16 13:04:15 +08:00
|
|
|
/*
|
|
|
|
* If this architecture has a platform capability string, copy it
|
|
|
|
* to userspace. In some cases (Sparc), this info is impossible
|
|
|
|
* for userspace to get any other way, in others (i386) it is
|
|
|
|
* merely difficult.
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
k_platform = ELF_PLATFORM;
|
2006-07-10 19:44:50 +08:00
|
|
|
u_platform = NULL;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (k_platform) {
|
|
|
|
platform_len = strlen(k_platform) + 1;
|
|
|
|
sp -= platform_len;
|
2006-06-23 17:04:05 +08:00
|
|
|
u_platform = (char __user *) sp;
|
2020-02-19 22:28:34 +08:00
|
|
|
if (copy_to_user(u_platform, k_platform, platform_len) != 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
2008-10-16 13:04:15 +08:00
|
|
|
/*
|
|
|
|
* If this architecture has a "base" platform capability
|
|
|
|
* string, copy it to userspace.
|
|
|
|
*/
|
|
|
|
k_base_platform = ELF_BASE_PLATFORM;
|
|
|
|
u_base_platform = NULL;
|
|
|
|
|
|
|
|
if (k_base_platform) {
|
|
|
|
platform_len = strlen(k_base_platform) + 1;
|
|
|
|
sp -= platform_len;
|
|
|
|
u_base_platform = (char __user *) sp;
|
2020-02-19 22:28:34 +08:00
|
|
|
if (copy_to_user(u_base_platform, k_base_platform, platform_len) != 0)
|
2008-10-16 13:04:15 +08:00
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
sp &= ~7UL;
|
|
|
|
|
|
|
|
/* stack the load map(s) */
|
2023-07-11 21:07:53 +08:00
|
|
|
len = sizeof(struct elf_fdpic_loadmap);
|
|
|
|
len += sizeof(struct elf_fdpic_loadseg) * exec_params->loadmap->nsegs;
|
2005-04-17 06:20:36 +08:00
|
|
|
sp = (sp - len) & ~7UL;
|
|
|
|
exec_params->map_addr = sp;
|
|
|
|
|
2006-06-23 17:04:05 +08:00
|
|
|
if (copy_to_user((void __user *) sp, exec_params->loadmap, len) != 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
current->mm->context.exec_fdpic_loadmap = (unsigned long) sp;
|
|
|
|
|
|
|
|
if (interp_params->loadmap) {
|
2023-07-11 21:07:53 +08:00
|
|
|
len = sizeof(struct elf_fdpic_loadmap);
|
|
|
|
len += sizeof(struct elf_fdpic_loadseg) *
|
2006-07-10 19:44:53 +08:00
|
|
|
interp_params->loadmap->nsegs;
|
2005-04-17 06:20:36 +08:00
|
|
|
sp = (sp - len) & ~7UL;
|
|
|
|
interp_params->map_addr = sp;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
if (copy_to_user((void __user *) sp, interp_params->loadmap,
|
|
|
|
len) != 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* force 16 byte _final_ alignment here for generality */
|
2008-10-16 13:04:16 +08:00
|
|
|
#define DLINFO_ITEMS 15
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-10-16 13:04:15 +08:00
|
|
|
nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) +
|
|
|
|
(k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-05-27 21:49:01 +08:00
|
|
|
if (bprm->have_execfd)
|
2008-10-16 13:04:16 +08:00
|
|
|
nitems++;
|
2024-08-26 11:27:45 +08:00
|
|
|
#ifdef ELF_HWCAP2
|
|
|
|
nitems++;
|
|
|
|
#endif
|
2008-10-16 13:04:16 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
csp = sp;
|
|
|
|
sp -= nitems * 2 * sizeof(unsigned long);
|
|
|
|
sp -= (bprm->envc + 1) * sizeof(char *); /* envv[] */
|
|
|
|
sp -= (bprm->argc + 1) * sizeof(char *); /* argv[] */
|
|
|
|
sp -= 1 * sizeof(unsigned long); /* argc */
|
|
|
|
|
|
|
|
csp -= sp & 15UL;
|
|
|
|
sp -= sp & 15UL;
|
|
|
|
|
2024-03-23 03:54:18 +08:00
|
|
|
/* Create the ELF interpreter info */
|
|
|
|
elf_info = (elf_addr_t *)mm->saved_auxv;
|
|
|
|
/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
|
|
|
|
#define NEW_AUX_ENT(id, val) \
|
|
|
|
do { \
|
|
|
|
*elf_info++ = id; \
|
|
|
|
*elf_info++ = val; \
|
2005-04-17 06:20:36 +08:00
|
|
|
} while (0)
|
|
|
|
|
2024-03-23 03:54:18 +08:00
|
|
|
#ifdef ARCH_DLINFO
|
|
|
|
/*
|
|
|
|
* ARCH_DLINFO must come first so PPC can do its special alignment of
|
|
|
|
* AUXV.
|
|
|
|
* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
|
|
|
|
* ARCH_DLINFO changes
|
|
|
|
*/
|
|
|
|
ARCH_DLINFO;
|
|
|
|
#endif
|
2013-04-18 01:33:11 +08:00
|
|
|
NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
|
|
|
|
#ifdef ELF_HWCAP2
|
|
|
|
NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
|
|
|
|
#endif
|
binfmt_elf_fdpic: Magical stack pointer index, for NEW_AUX_ENT compat.
While implementing binfmt_elf_fdpic on SH it quickly became apparent
that SH was the first platform to support both binfmt_elf_fdpic and
binfmt_elf, as well as the only of the FDPIC platforms to make use of the
auxvt.
Currently binfmt_elf_fdpic uses a special version of NEW_AUX_ENT() where
the first argument is the entry displacement after csp has been adjusted,
being reset after each adjustment. As we have no ability to sort this out
through the platform's ARCH_DLINFO, this index needs to be managed
entirely in create_elf_fdpic_tables(). Presently none of the platforms
that set their own auxvt entries are able to do so through their
respective ARCH_DLINFOs when using binfmt_elf_fdpic.
In addition to this, binfmt_elf_fdpic has been looking at
DLINFO_ARCH_ITEMS for the number of architecture-specific entries in the
auxvt. This is legacy cruft, and is not defined by any platforms in-tree,
even those that make heavy use of the auxvt. AT_VECTOR_SIZE_ARCH is
always available, and contains the number that is of interest here, so we
switch to using that unconditionally as well.
As this has direct bearing on how much stack is used, platforms that have
configurable (or dynamically adjustable) NEW_AUX_ENT calls need to either
make AT_VECTOR_SIZE_ARCH more fine-grained, or leave it as a worst-case
and live with some lost stack space if those entries aren't pushed (some
platforms may also need to purposely sacrifice some space here for
alignment considerations, as noted in the code -- although not an issue
for any FDPIC-capable platform today).
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: David Howells <dhowells@redhat.com>
2008-05-19 12:34:45 +08:00
|
|
|
NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE);
|
|
|
|
NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
|
|
|
|
NEW_AUX_ENT(AT_PHDR, exec_params->ph_addr);
|
|
|
|
NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
|
|
|
|
NEW_AUX_ENT(AT_PHNUM, exec_params->hdr.e_phnum);
|
|
|
|
NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
|
2020-01-28 21:25:39 +08:00
|
|
|
if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
|
|
|
|
flags |= AT_FLAGS_PRESERVE_ARGV0;
|
|
|
|
NEW_AUX_ENT(AT_FLAGS, flags);
|
binfmt_elf_fdpic: Magical stack pointer index, for NEW_AUX_ENT compat.
While implementing binfmt_elf_fdpic on SH it quickly became apparent
that SH was the first platform to support both binfmt_elf_fdpic and
binfmt_elf, as well as the only of the FDPIC platforms to make use of the
auxvt.
Currently binfmt_elf_fdpic uses a special version of NEW_AUX_ENT() where
the first argument is the entry displacement after csp has been adjusted,
being reset after each adjustment. As we have no ability to sort this out
through the platform's ARCH_DLINFO, this index needs to be managed
entirely in create_elf_fdpic_tables(). Presently none of the platforms
that set their own auxvt entries are able to do so through their
respective ARCH_DLINFOs when using binfmt_elf_fdpic.
In addition to this, binfmt_elf_fdpic has been looking at
DLINFO_ARCH_ITEMS for the number of architecture-specific entries in the
auxvt. This is legacy cruft, and is not defined by any platforms in-tree,
even those that make heavy use of the auxvt. AT_VECTOR_SIZE_ARCH is
always available, and contains the number that is of interest here, so we
switch to using that unconditionally as well.
As this has direct bearing on how much stack is used, platforms that have
configurable (or dynamically adjustable) NEW_AUX_ENT calls need to either
make AT_VECTOR_SIZE_ARCH more fine-grained, or leave it as a worst-case
and live with some lost stack space if those entries aren't pushed (some
platforms may also need to purposely sacrifice some space here for
alignment considerations, as noted in the code -- although not an issue
for any FDPIC-capable platform today).
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: David Howells <dhowells@redhat.com>
2008-05-19 12:34:45 +08:00
|
|
|
NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
|
2012-02-08 10:36:10 +08:00
|
|
|
NEW_AUX_ENT(AT_UID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->uid));
|
|
|
|
NEW_AUX_ENT(AT_EUID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->euid));
|
|
|
|
NEW_AUX_ENT(AT_GID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->gid));
|
|
|
|
NEW_AUX_ENT(AT_EGID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->egid));
|
binfmt: Introduce secureexec flag
The bprm_secureexec hook can be moved earlier. Right now, it is called
during create_elf_tables(), via load_binary(), via search_binary_handler(),
via exec_binprm(). Nearly all (see exception below) state used by
bprm_secureexec is created during the bprm_set_creds hook, called from
prepare_binprm().
For all LSMs (except commoncaps described next), only the first execution
of bprm_set_creds takes any effect (they all check bprm->called_set_creds
which prepare_binprm() sets after the first call to the bprm_set_creds
hook). However, all these LSMs also only do anything with bprm_secureexec
when they detected a secure state during their first run of bprm_set_creds.
Therefore, it is functionally identical to move the detection into
bprm_set_creds, since the results from secureexec here only need to be
based on the first call to the LSM's bprm_set_creds hook.
The single exception is that the commoncaps secureexec hook also examines
euid/uid and egid/gid differences which are controlled by bprm_fill_uid(),
via prepare_binprm(), which can be called multiple times (e.g.
binfmt_script, binfmt_misc), and may clear the euid/egid for the final
load (i.e. the script interpreter). However, while commoncaps specifically
ignores bprm->cred_prepared, and runs its bprm_set_creds hook each time
prepare_binprm() may get called, it needs to base the secureexec decision
on the final call to bprm_set_creds. As a result, it will need special
handling.
To begin this refactoring, this adds the secureexec flag to the bprm
struct, and calls the secureexec hook during setup_new_exec(). This is
safe since all the cred work is finished (and past the point of no return).
This explicit call will be removed in later patches once the hook has been
removed.
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: John Johansen <john.johansen@canonical.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: James Morris <james.l.morris@oracle.com>
2017-07-19 06:25:22 +08:00
|
|
|
NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
|
2008-10-16 13:04:16 +08:00
|
|
|
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
|
2024-03-23 03:54:18 +08:00
|
|
|
if (k_platform)
|
|
|
|
NEW_AUX_ENT(AT_PLATFORM,
|
|
|
|
(elf_addr_t)(unsigned long)u_platform);
|
|
|
|
if (k_base_platform)
|
|
|
|
NEW_AUX_ENT(AT_BASE_PLATFORM,
|
|
|
|
(elf_addr_t)(unsigned long)u_base_platform);
|
|
|
|
if (bprm->have_execfd)
|
|
|
|
NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
|
|
|
|
#undef NEW_AUX_ENT
|
|
|
|
/* AT_NULL is zero; clear the rest too */
|
|
|
|
memset(elf_info, 0, (char *)mm->saved_auxv +
|
|
|
|
sizeof(mm->saved_auxv) - (char *)elf_info);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2024-03-23 03:54:18 +08:00
|
|
|
/* And advance past the AT_NULL entry. */
|
|
|
|
elf_info += 2;
|
binfmt_elf_fdpic: Magical stack pointer index, for NEW_AUX_ENT compat.
While implementing binfmt_elf_fdpic on SH it quickly became apparent
that SH was the first platform to support both binfmt_elf_fdpic and
binfmt_elf, as well as the only of the FDPIC platforms to make use of the
auxvt.
Currently binfmt_elf_fdpic uses a special version of NEW_AUX_ENT() where
the first argument is the entry displacement after csp has been adjusted,
being reset after each adjustment. As we have no ability to sort this out
through the platform's ARCH_DLINFO, this index needs to be managed
entirely in create_elf_fdpic_tables(). Presently none of the platforms
that set their own auxvt entries are able to do so through their
respective ARCH_DLINFOs when using binfmt_elf_fdpic.
In addition to this, binfmt_elf_fdpic has been looking at
DLINFO_ARCH_ITEMS for the number of architecture-specific entries in the
auxvt. This is legacy cruft, and is not defined by any platforms in-tree,
even those that make heavy use of the auxvt. AT_VECTOR_SIZE_ARCH is
always available, and contains the number that is of interest here, so we
switch to using that unconditionally as well.
As this has direct bearing on how much stack is used, platforms that have
configurable (or dynamically adjustable) NEW_AUX_ENT calls need to either
make AT_VECTOR_SIZE_ARCH more fine-grained, or leave it as a worst-case
and live with some lost stack space if those entries aren't pushed (some
platforms may also need to purposely sacrifice some space here for
alignment considerations, as noted in the code -- although not an issue
for any FDPIC-capable platform today).
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: David Howells <dhowells@redhat.com>
2008-05-19 12:34:45 +08:00
|
|
|
|
2024-03-23 03:54:18 +08:00
|
|
|
ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
|
|
|
|
csp -= ei_index * sizeof(elf_addr_t);
|
|
|
|
|
|
|
|
/* Put the elf_info on the stack in the right place. */
|
|
|
|
if (copy_to_user((void __user *)csp, mm->saved_auxv,
|
|
|
|
ei_index * sizeof(elf_addr_t)))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* allocate room for argv[] and envv[] */
|
|
|
|
csp -= (bprm->envc + 1) * sizeof(elf_caddr_t);
|
2006-06-23 17:04:05 +08:00
|
|
|
envp = (elf_caddr_t __user *) csp;
|
2005-04-17 06:20:36 +08:00
|
|
|
csp -= (bprm->argc + 1) * sizeof(elf_caddr_t);
|
2006-06-23 17:04:05 +08:00
|
|
|
argv = (elf_caddr_t __user *) csp;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* stack argc */
|
|
|
|
csp -= sizeof(unsigned long);
|
2020-02-19 22:28:34 +08:00
|
|
|
if (put_user(bprm->argc, (unsigned long __user *) csp))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-03-25 01:38:48 +08:00
|
|
|
BUG_ON(csp != sp);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* fill in the argv[] array */
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
current->mm->arg_start = bprm->p;
|
|
|
|
#else
|
2006-07-10 19:44:53 +08:00
|
|
|
current->mm->arg_start = current->mm->start_stack -
|
|
|
|
(MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2006-06-23 17:04:05 +08:00
|
|
|
p = (char __user *) current->mm->arg_start;
|
2005-04-17 06:20:36 +08:00
|
|
|
for (loop = bprm->argc; loop > 0; loop--) {
|
2020-02-19 22:28:34 +08:00
|
|
|
if (put_user((elf_caddr_t) p, argv++))
|
|
|
|
return -EFAULT;
|
2007-07-19 16:48:16 +08:00
|
|
|
len = strnlen_user(p, MAX_ARG_STRLEN);
|
|
|
|
if (!len || len > MAX_ARG_STRLEN)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
p += len;
|
|
|
|
}
|
2020-02-19 22:28:34 +08:00
|
|
|
if (put_user(NULL, argv))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
current->mm->arg_end = (unsigned long) p;
|
|
|
|
|
|
|
|
/* fill in the envv[] array */
|
|
|
|
current->mm->env_start = (unsigned long) p;
|
|
|
|
for (loop = bprm->envc; loop > 0; loop--) {
|
2020-02-19 22:28:34 +08:00
|
|
|
if (put_user((elf_caddr_t)(unsigned long) p, envp++))
|
|
|
|
return -EFAULT;
|
2007-07-19 16:48:16 +08:00
|
|
|
len = strnlen_user(p, MAX_ARG_STRLEN);
|
|
|
|
if (!len || len > MAX_ARG_STRLEN)
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
p += len;
|
|
|
|
}
|
2020-02-19 22:28:34 +08:00
|
|
|
if (put_user(NULL, envp))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
current->mm->env_end = (unsigned long) p;
|
|
|
|
|
|
|
|
mm->start_stack = (unsigned long) sp;
|
|
|
|
return 0;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* load the appropriate binary image (executable or interpreter) into memory
|
|
|
|
* - we assume no MMU is available
|
|
|
|
* - if no other PIC bits are set in params->hdr->e_flags
|
|
|
|
* - we assume that the LOADable segments in the binary are independently relocatable
|
|
|
|
* - we assume R/O executable segments are shareable
|
|
|
|
* - else
|
|
|
|
* - we assume the loadable parts of the image to require fixed displacement
|
|
|
|
* - the image is not shareable
|
|
|
|
*/
|
|
|
|
static int elf_fdpic_map_file(struct elf_fdpic_params *params,
|
|
|
|
struct file *file,
|
|
|
|
struct mm_struct *mm,
|
|
|
|
const char *what)
|
|
|
|
{
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_fdpic_loadmap *loadmap;
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef CONFIG_MMU
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_fdpic_loadseg *mseg;
|
2023-05-29 00:20:25 +08:00
|
|
|
unsigned long load_addr;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_fdpic_loadseg *seg;
|
|
|
|
struct elf_phdr *phdr;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned nloads, tmp;
|
2023-05-29 00:20:25 +08:00
|
|
|
unsigned long stop;
|
2005-04-17 06:20:36 +08:00
|
|
|
int loop, ret;
|
|
|
|
|
|
|
|
/* allocate a load map table */
|
|
|
|
nloads = 0;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++)
|
|
|
|
if (params->phdrs[loop].p_type == PT_LOAD)
|
|
|
|
nloads++;
|
|
|
|
|
|
|
|
if (nloads == 0)
|
|
|
|
return -ELIBBAD;
|
|
|
|
|
2023-05-29 00:20:24 +08:00
|
|
|
loadmap = kzalloc(struct_size(loadmap, segs, nloads), GFP_KERNEL);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!loadmap)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
params->loadmap = loadmap;
|
|
|
|
|
2023-07-11 21:07:53 +08:00
|
|
|
loadmap->version = ELF_FDPIC_LOADMAP_VERSION;
|
2005-04-17 06:20:36 +08:00
|
|
|
loadmap->nsegs = nloads;
|
|
|
|
|
|
|
|
/* map the requested LOADs into the memory space */
|
|
|
|
switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
|
|
|
|
case ELF_FDPIC_FLAG_CONSTDISP:
|
|
|
|
case ELF_FDPIC_FLAG_CONTIGUOUS:
|
|
|
|
#ifndef CONFIG_MMU
|
|
|
|
ret = elf_fdpic_map_file_constdisp_on_uclinux(params, file, mm);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
ret = elf_fdpic_map_file_by_direct_mmap(params, file, mm);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* map the entry point */
|
|
|
|
if (params->hdr.e_entry) {
|
|
|
|
seg = loadmap->segs;
|
|
|
|
for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
|
|
|
|
if (params->hdr.e_entry >= seg->p_vaddr &&
|
2006-07-10 19:44:53 +08:00
|
|
|
params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) {
|
2005-04-17 06:20:36 +08:00
|
|
|
params->entry_addr =
|
2006-07-10 19:44:53 +08:00
|
|
|
(params->hdr.e_entry - seg->p_vaddr) +
|
|
|
|
seg->addr;
|
2005-04-17 06:20:36 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* determine where the program header table has wound up if mapped */
|
2006-07-10 19:44:53 +08:00
|
|
|
stop = params->hdr.e_phoff;
|
|
|
|
stop += params->hdr.e_phnum * sizeof (struct elf_phdr);
|
2005-04-17 06:20:36 +08:00
|
|
|
phdr = params->phdrs;
|
|
|
|
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
if (phdr->p_type != PT_LOAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (phdr->p_offset > params->hdr.e_phoff ||
|
|
|
|
phdr->p_offset + phdr->p_filesz < stop)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
seg = loadmap->segs;
|
|
|
|
for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
|
|
|
|
if (phdr->p_vaddr >= seg->p_vaddr &&
|
2006-07-10 19:44:53 +08:00
|
|
|
phdr->p_vaddr + phdr->p_filesz <=
|
|
|
|
seg->p_vaddr + seg->p_memsz) {
|
|
|
|
params->ph_addr =
|
|
|
|
(phdr->p_vaddr - seg->p_vaddr) +
|
|
|
|
seg->addr +
|
2005-04-17 06:20:36 +08:00
|
|
|
params->hdr.e_phoff - phdr->p_offset;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* determine where the dynamic section has wound up if there is one */
|
|
|
|
phdr = params->phdrs;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
if (phdr->p_type != PT_DYNAMIC)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
seg = loadmap->segs;
|
|
|
|
for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
|
|
|
|
if (phdr->p_vaddr >= seg->p_vaddr &&
|
2006-07-10 19:44:53 +08:00
|
|
|
phdr->p_vaddr + phdr->p_memsz <=
|
|
|
|
seg->p_vaddr + seg->p_memsz) {
|
2023-07-11 21:07:53 +08:00
|
|
|
Elf_Dyn __user *dyn;
|
|
|
|
Elf_Sword d_tag;
|
2017-08-17 06:56:14 +08:00
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
params->dynamic_addr =
|
|
|
|
(phdr->p_vaddr - seg->p_vaddr) +
|
|
|
|
seg->addr;
|
|
|
|
|
|
|
|
/* check the dynamic section contains at least
|
|
|
|
* one item, and that the last item is a NULL
|
|
|
|
* entry */
|
2005-04-17 06:20:36 +08:00
|
|
|
if (phdr->p_memsz == 0 ||
|
2023-07-11 21:07:53 +08:00
|
|
|
phdr->p_memsz % sizeof(Elf_Dyn) != 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
goto dynamic_error;
|
|
|
|
|
2023-07-11 21:07:53 +08:00
|
|
|
tmp = phdr->p_memsz / sizeof(Elf_Dyn);
|
|
|
|
dyn = (Elf_Dyn __user *)params->dynamic_addr;
|
2020-02-19 22:28:34 +08:00
|
|
|
if (get_user(d_tag, &dyn[tmp - 1].d_tag) ||
|
|
|
|
d_tag != 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
goto dynamic_error;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now elide adjacent segments in the load map on MMU linux
|
2006-07-10 19:44:53 +08:00
|
|
|
* - on uClinux the holes between may actually be filled with system
|
|
|
|
* stuff or stuff from other processes
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
nloads = loadmap->nsegs;
|
|
|
|
mseg = loadmap->segs;
|
|
|
|
seg = mseg + 1;
|
|
|
|
for (loop = 1; loop < nloads; loop++) {
|
|
|
|
/* see if we have a candidate for merging */
|
|
|
|
if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) {
|
|
|
|
load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz);
|
|
|
|
if (load_addr == (seg->addr & PAGE_MASK)) {
|
2006-07-10 19:44:53 +08:00
|
|
|
mseg->p_memsz +=
|
|
|
|
load_addr -
|
|
|
|
(mseg->addr + mseg->p_memsz);
|
2005-04-17 06:20:36 +08:00
|
|
|
mseg->p_memsz += seg->addr & ~PAGE_MASK;
|
|
|
|
mseg->p_memsz += seg->p_memsz;
|
|
|
|
loadmap->nsegs--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mseg++;
|
|
|
|
if (mseg != seg)
|
|
|
|
*mseg = *seg;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
kdebug("Mapped Object [%s]:", what);
|
|
|
|
kdebug("- elfhdr : %lx", params->elfhdr_addr);
|
|
|
|
kdebug("- entry : %lx", params->entry_addr);
|
|
|
|
kdebug("- PHDR[] : %lx", params->ph_addr);
|
|
|
|
kdebug("- DYNAMIC[]: %lx", params->dynamic_addr);
|
|
|
|
seg = loadmap->segs;
|
|
|
|
for (loop = 0; loop < loadmap->nsegs; loop++, seg++)
|
2023-09-27 21:29:33 +08:00
|
|
|
kdebug("- LOAD[%d] : %08llx-%08llx [va=%llx ms=%llx]",
|
2005-04-17 06:20:36 +08:00
|
|
|
loop,
|
2023-09-27 21:29:33 +08:00
|
|
|
(unsigned long long) seg->addr,
|
|
|
|
(unsigned long long) seg->addr + seg->p_memsz - 1,
|
|
|
|
(unsigned long long) seg->p_vaddr,
|
|
|
|
(unsigned long long) seg->p_memsz);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
dynamic_error:
|
2005-04-17 06:20:36 +08:00
|
|
|
printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
|
2013-01-24 06:07:38 +08:00
|
|
|
what, file_inode(file)->i_ino);
|
2005-04-17 06:20:36 +08:00
|
|
|
return -ELIBBAD;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* map a file with constant displacement under uClinux
|
|
|
|
*/
|
|
|
|
#ifndef CONFIG_MMU
|
2006-07-10 19:44:53 +08:00
|
|
|
static int elf_fdpic_map_file_constdisp_on_uclinux(
|
|
|
|
struct elf_fdpic_params *params,
|
|
|
|
struct file *file,
|
|
|
|
struct mm_struct *mm)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_fdpic_loadseg *seg;
|
|
|
|
struct elf_phdr *phdr;
|
2021-06-29 10:38:31 +08:00
|
|
|
unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
int loop, ret;
|
|
|
|
|
|
|
|
load_addr = params->load_addr;
|
|
|
|
seg = params->loadmap->segs;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
/* determine the bounds of the contiguous overall allocation we must
|
|
|
|
* make */
|
2005-04-17 06:20:36 +08:00
|
|
|
phdr = params->phdrs;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
if (params->phdrs[loop].p_type != PT_LOAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (base > phdr->p_vaddr)
|
|
|
|
base = phdr->p_vaddr;
|
|
|
|
if (top < phdr->p_vaddr + phdr->p_memsz)
|
|
|
|
top = phdr->p_vaddr + phdr->p_memsz;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate one big anon block for everything */
|
2012-04-21 08:13:58 +08:00
|
|
|
maddr = vm_mmap(NULL, load_addr, top - base,
|
2021-06-29 10:38:31 +08:00
|
|
|
PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, 0);
|
2006-07-10 19:44:53 +08:00
|
|
|
if (IS_ERR_VALUE(maddr))
|
2005-04-17 06:20:36 +08:00
|
|
|
return (int) maddr;
|
|
|
|
|
|
|
|
if (load_addr != 0)
|
|
|
|
load_addr += PAGE_ALIGN(top - base);
|
|
|
|
|
|
|
|
/* and then load the file segments into it */
|
|
|
|
phdr = params->phdrs;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
if (params->phdrs[loop].p_type != PT_LOAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
seg->addr = maddr + (phdr->p_vaddr - base);
|
|
|
|
seg->p_vaddr = phdr->p_vaddr;
|
|
|
|
seg->p_memsz = phdr->p_memsz;
|
|
|
|
|
2013-04-14 08:31:37 +08:00
|
|
|
ret = read_code(file, seg->addr, phdr->p_offset,
|
|
|
|
phdr->p_filesz);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* map the ELF header address if in this segment */
|
|
|
|
if (phdr->p_offset == 0)
|
|
|
|
params->elfhdr_addr = seg->addr;
|
|
|
|
|
|
|
|
/* clear any space allocated but not loaded */
|
2009-04-03 07:58:28 +08:00
|
|
|
if (phdr->p_filesz < phdr->p_memsz) {
|
2010-06-01 21:10:47 +08:00
|
|
|
if (clear_user((void *) (seg->addr + phdr->p_filesz),
|
|
|
|
phdr->p_memsz - phdr->p_filesz))
|
|
|
|
return -EFAULT;
|
2009-04-03 07:58:28 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (mm) {
|
|
|
|
if (phdr->p_flags & PF_X) {
|
2007-03-23 15:10:00 +08:00
|
|
|
if (!mm->start_code) {
|
|
|
|
mm->start_code = seg->addr;
|
|
|
|
mm->end_code = seg->addr +
|
|
|
|
phdr->p_memsz;
|
|
|
|
}
|
2006-07-10 19:44:53 +08:00
|
|
|
} else if (!mm->start_data) {
|
2005-04-17 06:20:36 +08:00
|
|
|
mm->start_data = seg->addr;
|
|
|
|
mm->end_data = seg->addr + phdr->p_memsz;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
seg++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* map a binary by direct mmap() of the individual PT_LOAD segments
|
|
|
|
*/
|
|
|
|
static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
|
|
|
|
struct file *file,
|
|
|
|
struct mm_struct *mm)
|
|
|
|
{
|
2023-07-11 21:07:53 +08:00
|
|
|
struct elf_fdpic_loadseg *seg;
|
|
|
|
struct elf_phdr *phdr;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long load_addr, delta_vaddr;
|
2010-06-01 21:10:47 +08:00
|
|
|
int loop, dvset;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
load_addr = params->load_addr;
|
|
|
|
delta_vaddr = 0;
|
|
|
|
dvset = 0;
|
|
|
|
|
|
|
|
seg = params->loadmap->segs;
|
|
|
|
|
|
|
|
/* deal with each load segment separately */
|
|
|
|
phdr = params->phdrs;
|
|
|
|
for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
|
|
|
|
unsigned long maddr, disp, excess, excess1;
|
|
|
|
int prot = 0, flags;
|
|
|
|
|
|
|
|
if (phdr->p_type != PT_LOAD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
kdebug("[LOAD] va=%lx of=%lx fs=%lx ms=%lx",
|
|
|
|
(unsigned long) phdr->p_vaddr,
|
|
|
|
(unsigned long) phdr->p_offset,
|
|
|
|
(unsigned long) phdr->p_filesz,
|
|
|
|
(unsigned long) phdr->p_memsz);
|
|
|
|
|
|
|
|
/* determine the mapping parameters */
|
|
|
|
if (phdr->p_flags & PF_R) prot |= PROT_READ;
|
|
|
|
if (phdr->p_flags & PF_W) prot |= PROT_WRITE;
|
|
|
|
if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
|
|
|
|
|
binfmt: remove in-tree usage of MAP_DENYWRITE
At exec time when we mmap the new executable via MAP_DENYWRITE we have it
opened via do_open_execat() and already deny_write_access()'ed the file
successfully. Once exec completes, we allow_write_acces(); however,
we set mm->exe_file in begin_new_exec() via set_mm_exe_file() and
also deny_write_access() as long as mm->exe_file remains set. We'll
effectively deny write access to our executable via mm->exe_file
until mm->exe_file is changed -- when the process is removed, on new
exec, or via sys_prctl(PR_SET_MM_MAP/EXE_FILE).
Let's remove all usage of MAP_DENYWRITE, it's no longer necessary for
mm->exe_file.
In case of an elf interpreter, we'll now only deny write access to the file
during exec. This is somewhat okay, because the interpreter behaves
(and sometime is) a shared library; all shared libraries, especially the
ones loaded directly in user space like via dlopen() won't ever be mapped
via MAP_DENYWRITE, because we ignore that from user space completely;
these shared libraries can always be modified while mapped and executed.
Let's only special-case the main executable, denying write access while
being executed by a process. This can be considered a minor user space
visible change.
While this is a cleanup, it also fixes part of a problem reported with
VM_DENYWRITE on overlayfs, as VM_DENYWRITE is effectively unused with
this patch and will be removed next:
"Overlayfs did not honor positive i_writecount on realfile for
VM_DENYWRITE mappings." [1]
[1] https://lore.kernel.org/r/YNHXzBgzRrZu1MrD@miu.piliscsaba.redhat.com/
Reported-by: Chengguang Xu <cgxu519@mykernel.net>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2021-04-23 15:42:41 +08:00
|
|
|
flags = MAP_PRIVATE;
|
2005-04-17 06:20:36 +08:00
|
|
|
maddr = 0;
|
|
|
|
|
|
|
|
switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
|
|
|
|
case ELF_FDPIC_FLAG_INDEPENDENT:
|
|
|
|
/* PT_LOADs are independently locatable */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ELF_FDPIC_FLAG_HONOURVADDR:
|
|
|
|
/* the specified virtual address must be honoured */
|
|
|
|
maddr = phdr->p_vaddr;
|
|
|
|
flags |= MAP_FIXED;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ELF_FDPIC_FLAG_CONSTDISP:
|
|
|
|
/* constant displacement
|
2006-07-10 19:44:53 +08:00
|
|
|
* - can be mapped anywhere, but must be mapped as a
|
|
|
|
* unit
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
if (!dvset) {
|
|
|
|
maddr = load_addr;
|
|
|
|
delta_vaddr = phdr->p_vaddr;
|
|
|
|
dvset = 1;
|
2006-07-10 19:44:53 +08:00
|
|
|
} else {
|
2005-04-17 06:20:36 +08:00
|
|
|
maddr = load_addr + phdr->p_vaddr - delta_vaddr;
|
|
|
|
flags |= MAP_FIXED;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ELF_FDPIC_FLAG_CONTIGUOUS:
|
|
|
|
/* contiguity handled later */
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
|
|
|
maddr &= PAGE_MASK;
|
|
|
|
|
|
|
|
/* create the mapping */
|
|
|
|
disp = phdr->p_vaddr & ~PAGE_MASK;
|
2012-04-21 08:13:58 +08:00
|
|
|
maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
|
2005-04-17 06:20:36 +08:00
|
|
|
phdr->p_offset - disp);
|
|
|
|
|
2023-09-27 21:29:33 +08:00
|
|
|
kdebug("mmap[%d] <file> sz=%llx pr=%x fl=%x of=%llx --> %08lx",
|
|
|
|
loop, (unsigned long long) phdr->p_memsz + disp,
|
|
|
|
prot, flags, (unsigned long long) phdr->p_offset - disp,
|
|
|
|
maddr);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
if (IS_ERR_VALUE(maddr))
|
2005-04-17 06:20:36 +08:00
|
|
|
return (int) maddr;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) ==
|
|
|
|
ELF_FDPIC_FLAG_CONTIGUOUS)
|
2005-04-17 06:20:36 +08:00
|
|
|
load_addr += PAGE_ALIGN(phdr->p_memsz + disp);
|
|
|
|
|
|
|
|
seg->addr = maddr + disp;
|
|
|
|
seg->p_vaddr = phdr->p_vaddr;
|
|
|
|
seg->p_memsz = phdr->p_memsz;
|
|
|
|
|
|
|
|
/* map the ELF header address if in this segment */
|
|
|
|
if (phdr->p_offset == 0)
|
|
|
|
params->elfhdr_addr = seg->addr;
|
|
|
|
|
2006-07-10 19:44:53 +08:00
|
|
|
/* clear the bit between beginning of mapping and beginning of
|
|
|
|
* PT_LOAD */
|
2005-04-17 06:20:36 +08:00
|
|
|
if (prot & PROT_WRITE && disp > 0) {
|
|
|
|
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
|
2010-06-01 21:10:47 +08:00
|
|
|
if (clear_user((void __user *) maddr, disp))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
maddr += disp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* clear any space allocated but not loaded
|
|
|
|
* - on uClinux we can just clear the lot
|
|
|
|
* - on MMU linux we'll get a SIGBUS beyond the last page
|
|
|
|
* extant in the file
|
|
|
|
*/
|
|
|
|
excess = phdr->p_memsz - phdr->p_filesz;
|
|
|
|
excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK);
|
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
if (excess > excess1) {
|
|
|
|
unsigned long xaddr = maddr + phdr->p_filesz + excess1;
|
|
|
|
unsigned long xmaddr;
|
|
|
|
|
|
|
|
flags |= MAP_FIXED | MAP_ANONYMOUS;
|
2012-04-21 08:13:58 +08:00
|
|
|
xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
|
2006-07-10 19:44:53 +08:00
|
|
|
prot, flags, 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
kdebug("mmap[%d] <anon>"
|
|
|
|
" ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
|
2006-07-10 19:44:53 +08:00
|
|
|
loop, xaddr, excess - excess1, prot, flags,
|
|
|
|
xmaddr);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (xmaddr != xaddr)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prot & PROT_WRITE && excess1 > 0) {
|
|
|
|
kdebug("clear[%d] ad=%lx sz=%lx",
|
|
|
|
loop, maddr + phdr->p_filesz, excess1);
|
2010-06-01 21:10:47 +08:00
|
|
|
if (clear_user((void __user *) maddr + phdr->p_filesz,
|
|
|
|
excess1))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
if (excess > 0) {
|
2023-09-27 21:29:33 +08:00
|
|
|
kdebug("clear[%d] ad=%llx sz=%lx", loop,
|
|
|
|
(unsigned long long) maddr + phdr->p_filesz,
|
|
|
|
excess);
|
2010-06-01 21:10:47 +08:00
|
|
|
if (clear_user((void *) maddr + phdr->p_filesz, excess))
|
|
|
|
return -EFAULT;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (mm) {
|
|
|
|
if (phdr->p_flags & PF_X) {
|
2007-03-23 15:10:00 +08:00
|
|
|
if (!mm->start_code) {
|
|
|
|
mm->start_code = maddr;
|
|
|
|
mm->end_code = maddr + phdr->p_memsz;
|
|
|
|
}
|
2006-07-10 19:44:53 +08:00
|
|
|
} else if (!mm->start_data) {
|
2005-04-17 06:20:36 +08:00
|
|
|
mm->start_data = maddr;
|
|
|
|
mm->end_data = maddr + phdr->p_memsz;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
seg++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2006-07-10 19:44:53 +08:00
|
|
|
}
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* ELF-FDPIC core dumper
|
|
|
|
*
|
|
|
|
* Modelled on fs/exec.c:aout_core_dump()
|
|
|
|
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
|
|
|
|
*
|
|
|
|
* Modelled on fs/binfmt_elf.c core dumper
|
|
|
|
*/
|
2009-12-16 08:47:37 +08:00
|
|
|
#ifdef CONFIG_ELF_CORE
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2020-06-14 21:52:06 +08:00
|
|
|
struct elf_prstatus_fdpic
|
|
|
|
{
|
2020-06-13 12:08:44 +08:00
|
|
|
struct elf_prstatus_common common;
|
2020-06-14 21:52:06 +08:00
|
|
|
elf_gregset_t pr_reg; /* GP registers */
|
|
|
|
/* When using FDPIC, the loadmap addresses need to be communicated
|
|
|
|
* to GDB in order for GDB to do the necessary relocations. The
|
|
|
|
* fields (below) used to communicate this information are placed
|
|
|
|
* immediately after ``pr_reg'', so that the loadmap addresses may
|
|
|
|
* be viewed as part of the register set if so desired.
|
|
|
|
*/
|
|
|
|
unsigned long pr_exec_fdpic_loadmap;
|
|
|
|
unsigned long pr_interp_fdpic_loadmap;
|
|
|
|
int pr_fpvalid; /* True if math co-processor being used. */
|
|
|
|
};
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
/* An ELF note in memory */
|
|
|
|
struct memelfnote
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
int type;
|
|
|
|
unsigned int datasz;
|
|
|
|
void *data;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int notesize(struct memelfnote *en)
|
|
|
|
{
|
|
|
|
int sz;
|
|
|
|
|
|
|
|
sz = sizeof(struct elf_note);
|
|
|
|
sz += roundup(strlen(en->name) + 1, 4);
|
|
|
|
sz += roundup(en->datasz, 4);
|
|
|
|
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* #define DEBUG */
|
|
|
|
|
2013-10-06 06:58:47 +08:00
|
|
|
static int writenote(struct memelfnote *men, struct coredump_params *cprm)
|
coredump: unify dump_seek() implementations for each binfmt_*.c
The current ELF dumper can produce broken corefiles if program headers
exceed 65535. In particular, the program in 64-bit environment often
demands more than 65535 mmaps. If you google max_map_count, then you can
find many users facing this problem.
Solaris has already dealt with this issue, and other OSes have also
adopted the same method as in Solaris. Currently, Sun's document and AMD
64 ABI include the description for the extension, where they call the
extension Extended Numbering. See Reference for further information.
I believe that linux kernel should adopt the same way as they did, so I've
written this patch.
I am also preparing for patches of GDB and binutils.
How to fix
==========
In new dumping process, there are two cases according to weather or
not the number of program headers is equal to or more than 65535.
- if less than 65535, the produced corefile format is exactly the same
as the ordinary one.
- if equal to or more than 65535, then e_phnum field is set to newly
introduced constant PN_XNUM(0xffff) and the actual number of program
headers is set to sh_info field of the section header at index 0.
Compatibility Concern
=====================
* As already mentioned in Summary, Sun and AMD64 has already adopted
this. See Reference.
* There are four combinations according to whether kernel and userland
tools are respectively modified or not. The next table summarizes
shortly for each combination.
---------------------------------------------
Original Kernel | Modified Kernel
---------------------------------------------
< 65535 | >= 65535 | < 65535 | >= 65535
-------------------------------------------------------------
Original Tools | OK | broken | OK | broken (#)
-------------------------------------------------------------
Modified Tools | OK | broken | OK | OK
-------------------------------------------------------------
Note that there is no case that `OK' changes to `broken'.
(#) Although this case remains broken, O-M behaves better than
O-O. That is, while in O-O case e_phnum field would be extremely
small due to integer overflow, in O-M case it is guaranteed to be at
least 65535 by being set to PN_XNUM(0xFFFF), much closer to the
actual correct value than the O-O case.
Test Program
============
Here is a test program mkmmaps.c that is useful to produce the
corefile with many mmaps. To use this, please take the following
steps:
$ ulimit -c unlimited
$ sysctl vm.max_map_count=70000 # default 65530 is too small
$ sysctl fs.file-max=70000
$ mkmmaps 65535
Then, the program will abort and a corefile will be generated.
If failed, there are two cases according to the error message
displayed.
* ``out of memory'' means vm.max_map_count is still smaller
* ``too many open files'' means fs.file-max is still smaller
So, please change it to a larger value, and then retry it.
mkmmaps.c
==
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char **argv)
{
int maps_num;
if (argc < 2) {
fprintf(stderr, "mkmmaps [number of maps to be created]\n");
exit(1);
}
if (sscanf(argv[1], "%d", &maps_num) == EOF) {
perror("sscanf");
exit(2);
}
if (maps_num < 0) {
fprintf(stderr, "%d is invalid\n", maps_num);
exit(3);
}
for (; maps_num > 0; --maps_num) {
if (MAP_FAILED == mmap((void *)NULL, (size_t) 1, PROT_READ,
MAP_SHARED | MAP_ANONYMOUS, (int) -1,
(off_t) NULL)) {
perror("mmap");
exit(4);
}
}
abort();
{
char buffer[128];
sprintf(buffer, "wc -l /proc/%u/maps", getpid());
system(buffer);
}
return 0;
}
Tested on i386, ia64 and um/sys-i386.
Built on sh4 (which covers fs/binfmt_elf_fdpic.c)
References
==========
- Sun microsystems: Linker and Libraries.
Part No: 817-1984-17, September 2008.
URL: http://docs.sun.com/app/docs/doc/817-1984
- System V ABI AMD64 Architecture Processor Supplement
Draft Version 0.99., May 11, 2009.
URL: http://www.x86-64.org/
This patch:
There are three different definitions for dump_seek() functions in
binfmt_aout.c, binfmt_elf.c and binfmt_elf_fdpic.c, respectively. The
only for binfmt_elf.c.
My next patch will move dump_seek() into a header file in order to share
the same implementations for dump_write() and dump_seek(). As the first
step, this patch unify these three definitions for dump_seek() by applying
the past commits that have been applied only for binfmt_elf.c.
Specifically, the modification made here is part of the following commits:
* d025c9db7f31fc0554ce7fb2dfc78d35a77f3487
* 7f14daa19ea36b200d237ad3ac5826ae25360461
This patch does not change a shape of corefiles.
Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-03-06 05:44:05 +08:00
|
|
|
{
|
|
|
|
struct elf_note en;
|
2006-07-10 19:44:55 +08:00
|
|
|
en.n_namesz = strlen(men->name) + 1;
|
|
|
|
en.n_descsz = men->datasz;
|
|
|
|
en.n_type = men->type;
|
|
|
|
|
2013-10-06 06:58:47 +08:00
|
|
|
return dump_emit(cprm, &en, sizeof(en)) &&
|
2013-10-08 23:05:01 +08:00
|
|
|
dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
|
|
|
|
dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
|
|
|
|
{
|
|
|
|
memcpy(elf->e_ident, ELFMAG, SELFMAG);
|
|
|
|
elf->e_ident[EI_CLASS] = ELF_CLASS;
|
|
|
|
elf->e_ident[EI_DATA] = ELF_DATA;
|
|
|
|
elf->e_ident[EI_VERSION] = EV_CURRENT;
|
|
|
|
elf->e_ident[EI_OSABI] = ELF_OSABI;
|
|
|
|
memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
|
|
|
|
|
|
|
|
elf->e_type = ET_CORE;
|
|
|
|
elf->e_machine = ELF_ARCH;
|
|
|
|
elf->e_version = EV_CURRENT;
|
|
|
|
elf->e_entry = 0;
|
|
|
|
elf->e_phoff = sizeof(struct elfhdr);
|
|
|
|
elf->e_shoff = 0;
|
|
|
|
elf->e_flags = ELF_FDPIC_CORE_EFLAGS;
|
|
|
|
elf->e_ehsize = sizeof(struct elfhdr);
|
|
|
|
elf->e_phentsize = sizeof(struct elf_phdr);
|
|
|
|
elf->e_phnum = segs;
|
|
|
|
elf->e_shentsize = 0;
|
|
|
|
elf->e_shnum = 0;
|
|
|
|
elf->e_shstrndx = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
|
|
|
|
{
|
|
|
|
phdr->p_type = PT_NOTE;
|
|
|
|
phdr->p_offset = offset;
|
|
|
|
phdr->p_vaddr = 0;
|
|
|
|
phdr->p_paddr = 0;
|
|
|
|
phdr->p_filesz = sz;
|
|
|
|
phdr->p_memsz = 0;
|
|
|
|
phdr->p_flags = 0;
|
2023-05-12 10:25:28 +08:00
|
|
|
phdr->p_align = 4;
|
2006-07-10 19:44:55 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fill_note(struct memelfnote *note, const char *name, int type,
|
|
|
|
unsigned int sz, void *data)
|
|
|
|
{
|
|
|
|
note->name = name;
|
|
|
|
note->type = type;
|
|
|
|
note->datasz = sz;
|
|
|
|
note->data = data;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fill up all the fields in prstatus from the given task struct, except
|
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 08:01:28 +08:00
|
|
|
* registers which need to be filled up separately.
|
2006-07-10 19:44:55 +08:00
|
|
|
*/
|
2020-06-13 12:08:44 +08:00
|
|
|
static void fill_prstatus(struct elf_prstatus_common *prstatus,
|
2006-07-10 19:44:55 +08:00
|
|
|
struct task_struct *p, long signr)
|
|
|
|
{
|
|
|
|
prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
|
|
|
|
prstatus->pr_sigpend = p->pending.signal.sig[0];
|
|
|
|
prstatus->pr_sighold = p->blocked.sig[0];
|
2009-06-18 07:27:38 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
|
|
|
|
rcu_read_unlock();
|
2007-10-19 14:40:14 +08:00
|
|
|
prstatus->pr_pid = task_pid_vnr(p);
|
|
|
|
prstatus->pr_pgrp = task_pgrp_vnr(p);
|
|
|
|
prstatus->pr_sid = task_session_vnr(p);
|
2006-07-10 19:44:55 +08:00
|
|
|
if (thread_group_leader(p)) {
|
2017-01-31 11:09:27 +08:00
|
|
|
struct task_cputime cputime;
|
2008-10-21 11:07:40 +08:00
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
/*
|
2008-10-21 11:07:40 +08:00
|
|
|
* This is the record for the group leader. It shows the
|
|
|
|
* group-wide total, not its individual thread total.
|
2006-07-10 19:44:55 +08:00
|
|
|
*/
|
2017-01-31 11:09:27 +08:00
|
|
|
thread_group_cputime(p, &cputime);
|
2017-11-23 20:46:33 +08:00
|
|
|
prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
|
|
|
|
prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
|
2006-07-10 19:44:55 +08:00
|
|
|
} else {
|
2017-01-31 11:09:27 +08:00
|
|
|
u64 utime, stime;
|
2012-11-13 21:20:55 +08:00
|
|
|
|
2017-01-31 11:09:27 +08:00
|
|
|
task_cputime(p, &utime, &stime);
|
2017-11-23 20:46:33 +08:00
|
|
|
prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
|
|
|
|
prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
2017-11-23 20:46:33 +08:00
|
|
|
prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
|
|
|
|
prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
|
|
|
|
struct mm_struct *mm)
|
|
|
|
{
|
2008-11-14 07:39:19 +08:00
|
|
|
const struct cred *cred;
|
2006-07-10 19:44:55 +08:00
|
|
|
unsigned int i, len;
|
2021-06-11 16:28:17 +08:00
|
|
|
unsigned int state;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
/* first copy the parameters from user space */
|
|
|
|
memset(psinfo, 0, sizeof(struct elf_prpsinfo));
|
|
|
|
|
|
|
|
len = mm->arg_end - mm->arg_start;
|
|
|
|
if (len >= ELF_PRARGSZ)
|
|
|
|
len = ELF_PRARGSZ - 1;
|
|
|
|
if (copy_from_user(&psinfo->pr_psargs,
|
|
|
|
(const char __user *) mm->arg_start, len))
|
|
|
|
return -EFAULT;
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
if (psinfo->pr_psargs[i] == 0)
|
|
|
|
psinfo->pr_psargs[i] = ' ';
|
|
|
|
psinfo->pr_psargs[len] = 0;
|
|
|
|
|
2009-06-18 07:27:38 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
|
|
|
|
rcu_read_unlock();
|
2007-10-19 14:40:14 +08:00
|
|
|
psinfo->pr_pid = task_pid_vnr(p);
|
|
|
|
psinfo->pr_pgrp = task_pgrp_vnr(p);
|
|
|
|
psinfo->pr_sid = task_session_vnr(p);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2021-06-11 16:28:17 +08:00
|
|
|
state = READ_ONCE(p->__state);
|
|
|
|
i = state ? ffz(~state) + 1 : 0;
|
2006-07-10 19:44:55 +08:00
|
|
|
psinfo->pr_state = i;
|
|
|
|
psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
|
|
|
|
psinfo->pr_zomb = psinfo->pr_sname == 'Z';
|
|
|
|
psinfo->pr_nice = task_nice(p);
|
|
|
|
psinfo->pr_flag = p->flags;
|
2008-11-14 07:39:19 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
cred = __task_cred(p);
|
2012-02-08 10:36:10 +08:00
|
|
|
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
|
|
|
|
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
|
2008-11-14 07:39:19 +08:00
|
|
|
rcu_read_unlock();
|
2024-03-22 04:04:08 +08:00
|
|
|
get_task_comm(psinfo->pr_fname, p);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Here is the structure in which status of each thread is captured. */
|
|
|
|
struct elf_thread_status
|
|
|
|
{
|
2020-06-15 22:19:31 +08:00
|
|
|
struct elf_thread_status *next;
|
2020-06-14 21:52:06 +08:00
|
|
|
struct elf_prstatus_fdpic prstatus; /* NT_PRSTATUS */
|
2006-07-10 19:44:55 +08:00
|
|
|
elf_fpregset_t fpu; /* NT_PRFPREG */
|
2020-06-15 23:22:55 +08:00
|
|
|
struct memelfnote notes[2];
|
2006-07-10 19:44:55 +08:00
|
|
|
int num_notes;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In order to add the specific thread information for the elf file format,
|
|
|
|
* we need to keep a linked list of every thread's pr_status and then create
|
|
|
|
* a single section for them in the final core file.
|
|
|
|
*/
|
2020-06-15 22:41:08 +08:00
|
|
|
static struct elf_thread_status *elf_dump_thread_status(long signr, struct task_struct *p, int *sz)
|
2006-07-10 19:44:55 +08:00
|
|
|
{
|
2020-06-15 23:22:55 +08:00
|
|
|
const struct user_regset_view *view = task_user_regset_view(p);
|
2020-06-15 22:41:08 +08:00
|
|
|
struct elf_thread_status *t;
|
2020-06-15 23:22:55 +08:00
|
|
|
int i, ret;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2020-06-15 22:41:08 +08:00
|
|
|
t = kzalloc(sizeof(struct elf_thread_status), GFP_KERNEL);
|
|
|
|
if (!t)
|
|
|
|
return t;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2020-06-13 12:08:44 +08:00
|
|
|
fill_prstatus(&t->prstatus.common, p, signr);
|
|
|
|
t->prstatus.pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap;
|
|
|
|
t->prstatus.pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap;
|
2020-06-15 23:22:55 +08:00
|
|
|
regset_get(p, &view->regsets[0],
|
|
|
|
sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
|
|
|
|
&t->prstatus);
|
|
|
|
t->num_notes++;
|
2020-06-15 22:41:08 +08:00
|
|
|
*sz += notesize(&t->notes[0]);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2020-06-15 23:22:55 +08:00
|
|
|
for (i = 1; i < view->n; ++i) {
|
|
|
|
const struct user_regset *regset = &view->regsets[i];
|
|
|
|
if (regset->core_note_type != NT_PRFPREG)
|
|
|
|
continue;
|
|
|
|
if (regset->active && regset->active(p, regset) <= 0)
|
|
|
|
continue;
|
|
|
|
ret = regset_get(p, regset, sizeof(t->fpu), &t->fpu);
|
|
|
|
if (ret >= 0)
|
|
|
|
t->prstatus.pr_fpvalid = 1;
|
|
|
|
break;
|
|
|
|
}
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
if (t->prstatus.pr_fpvalid) {
|
|
|
|
fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
|
|
|
|
&t->fpu);
|
|
|
|
t->num_notes++;
|
2020-06-15 22:41:08 +08:00
|
|
|
*sz += notesize(&t->notes[1]);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
2020-06-15 22:41:08 +08:00
|
|
|
return t;
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
2010-03-06 05:44:10 +08:00
|
|
|
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
|
|
|
|
elf_addr_t e_shoff, int segs)
|
|
|
|
{
|
|
|
|
elf->e_shoff = e_shoff;
|
|
|
|
elf->e_shentsize = sizeof(*shdr4extnum);
|
|
|
|
elf->e_shnum = 1;
|
|
|
|
elf->e_shstrndx = SHN_UNDEF;
|
|
|
|
|
|
|
|
memset(shdr4extnum, 0, sizeof(*shdr4extnum));
|
|
|
|
|
|
|
|
shdr4extnum->sh_type = SHT_NULL;
|
|
|
|
shdr4extnum->sh_size = elf->e_shnum;
|
|
|
|
shdr4extnum->sh_link = elf->e_shstrndx;
|
|
|
|
shdr4extnum->sh_info = segs;
|
|
|
|
}
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
/*
|
|
|
|
* dump the segments for an MMU process
|
|
|
|
*/
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
|
|
|
|
struct core_vma_metadata *vma_meta,
|
|
|
|
int vma_count)
|
2006-07-10 19:44:55 +08:00
|
|
|
{
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
int i;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
for (i = 0; i < vma_count; i++) {
|
|
|
|
struct core_vma_metadata *meta = vma_meta + i;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
if (!dump_user_range(cprm, meta->start, meta->dump_size))
|
2020-10-16 11:12:46 +08:00
|
|
|
return false;
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
2013-10-06 06:58:47 +08:00
|
|
|
return true;
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Actual dumper
|
|
|
|
*
|
|
|
|
* This is a two-pass process; first we find the offsets of the bits,
|
|
|
|
* and then they are actually written out. If we run out of core limit
|
|
|
|
* we just truncate.
|
|
|
|
*/
|
2009-12-18 07:27:16 +08:00
|
|
|
static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
2006-07-10 19:44:55 +08:00
|
|
|
{
|
|
|
|
int has_dumped = 0;
|
2022-03-09 02:55:29 +08:00
|
|
|
int segs;
|
2006-07-10 19:44:55 +08:00
|
|
|
int i;
|
|
|
|
struct elfhdr *elf = NULL;
|
2013-10-08 21:26:08 +08:00
|
|
|
loff_t offset = 0, dataoff;
|
2020-06-15 23:03:28 +08:00
|
|
|
struct memelfnote psinfo_note, auxv_note;
|
2006-07-10 19:44:55 +08:00
|
|
|
struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
|
2020-06-15 22:19:31 +08:00
|
|
|
struct elf_thread_status *thread_list = NULL;
|
2006-07-10 19:44:55 +08:00
|
|
|
int thread_status_size = 0;
|
|
|
|
elf_addr_t *auxv;
|
2010-03-06 05:44:09 +08:00
|
|
|
struct elf_phdr *phdr4note = NULL;
|
2010-03-06 05:44:10 +08:00
|
|
|
struct elf_shdr *shdr4extnum = NULL;
|
|
|
|
Elf_Half e_phnum;
|
|
|
|
elf_addr_t e_shoff;
|
2013-10-14 19:39:56 +08:00
|
|
|
struct core_thread *ct;
|
|
|
|
struct elf_thread_status *tmp;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
/* alloc memory for large data structures: too large to be on stack */
|
|
|
|
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
|
|
|
|
if (!elf)
|
2020-05-05 18:12:56 +08:00
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
|
|
|
|
if (!psinfo)
|
2020-05-05 18:12:56 +08:00
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2021-09-23 00:24:02 +08:00
|
|
|
for (ct = current->signal->core_state->dumper.next;
|
2013-10-14 19:39:56 +08:00
|
|
|
ct; ct = ct->next) {
|
2020-06-15 22:41:08 +08:00
|
|
|
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
|
|
|
|
ct->task, &thread_status_size);
|
2013-10-14 19:39:56 +08:00
|
|
|
if (!tmp)
|
2020-05-05 18:12:56 +08:00
|
|
|
goto end_coredump;
|
2008-07-25 16:47:45 +08:00
|
|
|
|
2020-06-15 22:19:31 +08:00
|
|
|
tmp->next = thread_list;
|
|
|
|
thread_list = tmp;
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* now collect the dump for the current */
|
2020-06-15 23:03:28 +08:00
|
|
|
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
|
|
|
|
current, &thread_status_size);
|
|
|
|
if (!tmp)
|
|
|
|
goto end_coredump;
|
|
|
|
tmp->next = thread_list;
|
|
|
|
thread_list = tmp;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2022-12-23 02:12:50 +08:00
|
|
|
segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2010-03-06 05:44:10 +08:00
|
|
|
/* for notes section */
|
|
|
|
segs++;
|
|
|
|
|
|
|
|
/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
|
|
|
|
* this, kernel supports extended numbering. Have a look at
|
|
|
|
* include/linux/elf.h for further information. */
|
|
|
|
e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
/* Set up header */
|
2010-03-06 05:44:10 +08:00
|
|
|
fill_elf_fdpic_header(elf, e_phnum);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
has_dumped = 1;
|
|
|
|
/*
|
|
|
|
* Set up the notes in similar form to SVR4 core dumps made
|
|
|
|
* with info from their /proc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
fill_psinfo(psinfo, current->group_leader, current->mm);
|
2020-06-15 23:03:28 +08:00
|
|
|
fill_note(&psinfo_note, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
|
|
|
|
thread_status_size += notesize(&psinfo_note);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
auxv = (elf_addr_t *) current->mm->saved_auxv;
|
|
|
|
i = 0;
|
|
|
|
do
|
|
|
|
i += 2;
|
|
|
|
while (auxv[i - 2] != AT_NULL);
|
2020-06-15 23:03:28 +08:00
|
|
|
fill_note(&auxv_note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
|
|
|
|
thread_status_size += notesize(&auxv_note);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2023-02-28 20:14:17 +08:00
|
|
|
offset = sizeof(*elf); /* ELF header */
|
2010-03-06 05:44:10 +08:00
|
|
|
offset += segs * sizeof(struct elf_phdr); /* Program headers */
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
/* Write notes phdr entry */
|
2020-06-15 23:03:28 +08:00
|
|
|
phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
|
|
|
|
if (!phdr4note)
|
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2020-06-15 23:03:28 +08:00
|
|
|
fill_elf_note_phdr(phdr4note, thread_status_size, offset);
|
|
|
|
offset += thread_status_size;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
/* Page-align dumped data */
|
|
|
|
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
|
|
|
|
|
2022-03-09 02:55:29 +08:00
|
|
|
offset += cprm->vma_data_size;
|
2022-12-23 02:12:50 +08:00
|
|
|
offset += elf_core_extra_data_size(cprm);
|
2010-03-06 05:44:10 +08:00
|
|
|
e_shoff = offset;
|
|
|
|
|
|
|
|
if (e_phnum == PN_XNUM) {
|
|
|
|
shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
|
|
|
|
if (!shdr4extnum)
|
|
|
|
goto end_coredump;
|
|
|
|
fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = dataoff;
|
|
|
|
|
2013-10-06 06:58:47 +08:00
|
|
|
if (!dump_emit(cprm, elf, sizeof(*elf)))
|
2010-03-06 05:44:09 +08:00
|
|
|
goto end_coredump;
|
|
|
|
|
2013-10-06 06:58:47 +08:00
|
|
|
if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
|
2010-03-06 05:44:09 +08:00
|
|
|
goto end_coredump;
|
|
|
|
|
2006-07-10 19:44:55 +08:00
|
|
|
/* write program headers for segments dump */
|
2022-03-09 02:55:29 +08:00
|
|
|
for (i = 0; i < cprm->vma_count; i++) {
|
|
|
|
struct core_vma_metadata *meta = cprm->vma_meta + i;
|
2006-07-10 19:44:55 +08:00
|
|
|
struct elf_phdr phdr;
|
|
|
|
size_t sz;
|
|
|
|
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
sz = meta->end - meta->start;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
|
|
|
phdr.p_type = PT_LOAD;
|
|
|
|
phdr.p_offset = offset;
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
phdr.p_vaddr = meta->start;
|
2006-07-10 19:44:55 +08:00
|
|
|
phdr.p_paddr = 0;
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
phdr.p_filesz = meta->dump_size;
|
2006-07-10 19:44:55 +08:00
|
|
|
phdr.p_memsz = sz;
|
|
|
|
offset += phdr.p_filesz;
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
phdr.p_flags = 0;
|
|
|
|
if (meta->flags & VM_READ)
|
|
|
|
phdr.p_flags |= PF_R;
|
|
|
|
if (meta->flags & VM_WRITE)
|
2006-07-10 19:44:55 +08:00
|
|
|
phdr.p_flags |= PF_W;
|
binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
In both binfmt_elf and binfmt_elf_fdpic, use a new helper
dump_vma_snapshot() to take a snapshot of the VMA list (including the gate
VMA, if we have one) while protected by the mmap_lock, and then use that
snapshot instead of walking the VMA list without locking.
An alternative approach would be to keep the mmap_lock held across the
entire core dumping operation; however, keeping the mmap_lock locked while
we may be blocked for an unbounded amount of time (e.g. because we're
dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock
blocks things like the ->release handler of userfaultfd, and we don't
really want critical system daemons to grind to a halt just because
someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or
something like that.
Since both the normal ELF code and the FDPIC ELF code need this
functionality (and if any other binfmt wants to add coredump support in
the future, they'd probably need it, too), implement this with a common
helper in fs/coredump.c.
A downside of this approach is that we now need a bigger amount of kernel
memory per userspace VMA in the normal ELF case, and that we need O(n)
kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't
be terribly bad.
There currently is a data race between stack expansion and anything that
reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to
mitigate that for core dumping, take the mmap_lock in write mode when
taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in
read mode, we could end up with a corrupted core dump if someone does
get_user_pages_remote() concurrently. Not really a major problem, but
taking the mmap_lock either way works here, so we might as well avoid the
issue.) (This doesn't do anything about the existing data races with stack
expansion in other mm code.)
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-16 11:12:54 +08:00
|
|
|
if (meta->flags & VM_EXEC)
|
2006-07-10 19:44:55 +08:00
|
|
|
phdr.p_flags |= PF_X;
|
|
|
|
phdr.p_align = ELF_EXEC_PAGESIZE;
|
|
|
|
|
2013-10-06 06:58:47 +08:00
|
|
|
if (!dump_emit(cprm, &phdr, sizeof(phdr)))
|
2010-03-06 05:44:06 +08:00
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
2013-10-06 05:22:57 +08:00
|
|
|
if (!elf_core_write_extra_phdrs(cprm, offset))
|
2010-03-06 05:44:07 +08:00
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2022-10-18 15:14:20 +08:00
|
|
|
/* write out the notes section */
|
2020-06-15 23:03:28 +08:00
|
|
|
if (!writenote(thread_list->notes, cprm))
|
|
|
|
goto end_coredump;
|
|
|
|
if (!writenote(&psinfo_note, cprm))
|
|
|
|
goto end_coredump;
|
|
|
|
if (!writenote(&auxv_note, cprm))
|
|
|
|
goto end_coredump;
|
|
|
|
for (i = 1; i < thread_list->num_notes; i++)
|
|
|
|
if (!writenote(thread_list->notes + i, cprm))
|
2006-07-10 19:44:55 +08:00
|
|
|
goto end_coredump;
|
|
|
|
|
|
|
|
/* write out the thread status notes section */
|
2020-06-15 23:03:28 +08:00
|
|
|
for (tmp = thread_list->next; tmp; tmp = tmp->next) {
|
2006-07-10 19:44:55 +08:00
|
|
|
for (i = 0; i < tmp->num_notes; i++)
|
2013-10-06 06:58:47 +08:00
|
|
|
if (!writenote(&tmp->notes[i], cprm))
|
2006-07-10 19:44:55 +08:00
|
|
|
goto end_coredump;
|
|
|
|
}
|
|
|
|
|
2020-03-08 21:16:37 +08:00
|
|
|
dump_skip_to(cprm, dataoff);
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2022-03-09 02:55:29 +08:00
|
|
|
if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count))
|
2006-07-10 19:44:55 +08:00
|
|
|
goto end_coredump;
|
|
|
|
|
2013-10-06 05:50:15 +08:00
|
|
|
if (!elf_core_write_extra_data(cprm))
|
2010-03-06 05:44:07 +08:00
|
|
|
goto end_coredump;
|
2006-07-10 19:44:55 +08:00
|
|
|
|
2010-03-06 05:44:10 +08:00
|
|
|
if (e_phnum == PN_XNUM) {
|
2013-10-06 06:58:47 +08:00
|
|
|
if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
|
2010-03-06 05:44:10 +08:00
|
|
|
goto end_coredump;
|
|
|
|
}
|
|
|
|
|
2010-01-04 14:42:14 +08:00
|
|
|
if (cprm->file->f_pos != offset) {
|
2006-07-10 19:44:55 +08:00
|
|
|
/* Sanity check */
|
|
|
|
printk(KERN_WARNING
|
|
|
|
"elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
|
2010-01-04 14:42:14 +08:00
|
|
|
cprm->file->f_pos, offset);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
end_coredump:
|
2020-06-15 22:19:31 +08:00
|
|
|
while (thread_list) {
|
|
|
|
tmp = thread_list;
|
|
|
|
thread_list = thread_list->next;
|
|
|
|
kfree(tmp);
|
2006-07-10 19:44:55 +08:00
|
|
|
}
|
2010-03-06 05:44:09 +08:00
|
|
|
kfree(phdr4note);
|
2006-07-10 19:44:55 +08:00
|
|
|
kfree(elf);
|
|
|
|
kfree(psinfo);
|
2011-07-06 19:26:05 +08:00
|
|
|
kfree(shdr4extnum);
|
2006-07-10 19:44:55 +08:00
|
|
|
return has_dumped;
|
|
|
|
}
|
|
|
|
|
2009-12-16 08:47:37 +08:00
|
|
|
#endif /* CONFIG_ELF_CORE */
|