linux/arch/x86/kernel/module.c
Song Liu ac3b432839 module: replace module_layout with module_memory
module_layout manages different types of memory (text, data, rodata, etc.)
in one allocation, which is problematic for some reasons:

1. It is hard to enable CONFIG_STRICT_MODULE_RWX.
2. It is hard to use huge pages in modules (and not break strict rwx).
3. Many archs uses module_layout for arch-specific data, but it is not
   obvious how these data are used (are they RO, RX, or RW?)

Improve the scenario by replacing 2 (or 3) module_layout per module with
up to 7 module_memory per module:

        MOD_TEXT,
        MOD_DATA,
        MOD_RODATA,
        MOD_RO_AFTER_INIT,
        MOD_INIT_TEXT,
        MOD_INIT_DATA,
        MOD_INIT_RODATA,

and allocating them separately. This adds slightly more entries to
mod_tree (from up to 3 entries per module, to up to 7 entries per
module). However, this at most adds a small constant overhead to
__module_address(), which is expected to be fast.

Various archs use module_layout for different data. These data are put
into different module_memory based on their location in module_layout.
IOW, data that used to go with text is allocated with MOD_MEM_TYPE_TEXT;
data that used to go with data is allocated with MOD_MEM_TYPE_DATA, etc.

module_memory simplifies quite some of the module code. For example,
ARCH_WANTS_MODULES_DATA_IN_VMALLOC is a lot cleaner, as it just uses a
different allocator for the data. kernel/module/strict_rwx.c is also
much cleaner with module_memory.

Signed-off-by: Song Liu <song@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
2023-03-09 12:55:15 -08:00

383 lines
9.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for x86.
Copyright (C) 2001 Rusty Russell.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/moduleloader.h>
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/jump_label.h>
#include <linux/random.h>
#include <linux/memory.h>
#include <asm/text-patching.h>
#include <asm/page.h>
#include <asm/setup.h>
#include <asm/unwind.h>
#if 0
#define DEBUGP(fmt, ...) \
printk(KERN_DEBUG fmt, ##__VA_ARGS__)
#else
#define DEBUGP(fmt, ...) \
do { \
if (0) \
printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
} while (0)
#endif
#ifdef CONFIG_RANDOMIZE_BASE
static unsigned long module_load_offset;
/* Mutex protects the module_load_offset. */
static DEFINE_MUTEX(module_kaslr_mutex);
static unsigned long int get_module_load_offset(void)
{
if (kaslr_enabled()) {
mutex_lock(&module_kaslr_mutex);
/*
* Calculate the module_load_offset the first time this
* code is called. Once calculated it stays the same until
* reboot.
*/
if (module_load_offset == 0)
module_load_offset =
get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
mutex_unlock(&module_kaslr_mutex);
}
return module_load_offset;
}
#else
static unsigned long int get_module_load_offset(void)
{
return 0;
}
#endif
void *module_alloc(unsigned long size)
{
gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
MODULES_END, gfp_mask, PAGE_KERNEL,
VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
NUMA_NO_NODE, __builtin_return_address(0));
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
return p;
}
#ifdef CONFIG_X86_32
int apply_relocate(Elf32_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me)
{
unsigned int i;
Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
Elf32_Sym *sym;
uint32_t *location;
DEBUGP("Applying relocate section %u to %u\n",
relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[i].r_offset;
/* This is the symbol it is referring to. Note that all
undefined symbols have been resolved. */
sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+ ELF32_R_SYM(rel[i].r_info);
switch (ELF32_R_TYPE(rel[i].r_info)) {
case R_386_32:
/* We add the value into the location given */
*location += sym->st_value;
break;
case R_386_PC32:
case R_386_PLT32:
/* Add the value, subtract its position */
*location += sym->st_value - (uint32_t)location;
break;
default:
pr_err("%s: Unknown relocation: %u\n",
me->name, ELF32_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
}
return 0;
}
#else /*X86_64*/
static int __write_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me,
void *(*write)(void *dest, const void *src, size_t len),
bool apply)
{
unsigned int i;
Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
Elf64_Sym *sym;
void *loc;
u64 val;
u64 zero = 0ULL;
DEBUGP("%s relocate section %u to %u\n",
apply ? "Applying" : "Clearing",
relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
size_t size;
/* This is where to make the change */
loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[i].r_offset;
/* This is the symbol it is referring to. Note that all
undefined symbols have been resolved. */
sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rel[i].r_info);
DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info),
sym->st_value, rel[i].r_addend, (u64)loc);
val = sym->st_value + rel[i].r_addend;
switch (ELF64_R_TYPE(rel[i].r_info)) {
case R_X86_64_NONE:
continue; /* nothing to write */
case R_X86_64_64:
size = 8;
break;
case R_X86_64_32:
if (val != *(u32 *)&val)
goto overflow;
size = 4;
break;
case R_X86_64_32S:
if ((s64)val != *(s32 *)&val)
goto overflow;
size = 4;
break;
case R_X86_64_PC32:
case R_X86_64_PLT32:
val -= (u64)loc;
size = 4;
break;
case R_X86_64_PC64:
val -= (u64)loc;
size = 8;
break;
default:
pr_err("%s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
if (apply) {
if (memcmp(loc, &zero, size)) {
pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
write(loc, &val, size);
} else {
if (memcmp(loc, &val, size)) {
pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
write(loc, &zero, size);
}
}
return 0;
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
me->name);
return -ENOEXEC;
}
static int write_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me,
bool apply)
{
int ret;
bool early = me->state == MODULE_STATE_UNFORMED;
void *(*write)(void *, const void *, size_t) = memcpy;
if (!early) {
write = text_poke;
mutex_lock(&text_mutex);
}
ret = __write_relocate_add(sechdrs, strtab, symindex, relsec, me,
write, apply);
if (!early) {
text_poke_sync();
mutex_unlock(&text_mutex);
}
return ret;
}
int apply_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me)
{
return write_relocate_add(sechdrs, strtab, symindex, relsec, me, true);
}
#ifdef CONFIG_LIVEPATCH
void clear_relocate_add(Elf64_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
unsigned int relsec,
struct module *me)
{
write_relocate_add(sechdrs, strtab, symindex, relsec, me, false);
}
#endif
#endif
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
if (!strcmp(".return_sites", secstrings + s->sh_name))
returns = s;
if (!strcmp(".call_sites", secstrings + s->sh_name))
calls = s;
if (!strcmp(".cfi_sites", secstrings + s->sh_name))
cfi = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
/*
* See alternative_instructions() for the ordering rules between the
* various patching types.
*/
if (para) {
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
if (retpolines) {
rseg = (void *)retpolines->sh_addr;
rsize = retpolines->sh_size;
}
if (cfi) {
cseg = (void *)cfi->sh_addr;
csize = cfi->sh_size;
}
apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
}
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
if (calls || para) {
struct callthunk_sites cs = {};
if (calls) {
cs.call_start = (void *)calls->sh_addr;
cs.call_end = (void *)calls->sh_addr + calls->sh_size;
}
if (para) {
cs.pv_start = (void *)para->sh_addr;
cs.pv_end = (void *)para->sh_addr + para->sh_size;
}
callthunks_patch_module_calls(&cs, me);
}
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
}
if (locks) {
void *lseg = (void *)locks->sh_addr;
void *text = me->mem[MOD_TEXT].base;
void *text_end = text + me->mem[MOD_TEXT].size;
alternatives_smp_module_add(me, me->name,
lseg, lseg + locks->sh_size,
text, text_end);
}
if (orc && orc_ip)
unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
(void *)orc->sh_addr, orc->sh_size);
return 0;
}
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
}