This tree cleans up and simplifies the x86 KASLR code, and

also fixes some corner case bugs.
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl+EjbwRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1g5ihAAw1B4ZV+yG6BKMIx8oC+LSefSX0lk6nv0
 pmPWIdvmlL1FHlyvALymU2UCneA7KYkGjdz6LYtXlvdGTpVj1M7mJAFBp8ugJj2k
 oOelkohTsTHOU/Oae4+OX8E8Kt5PsFS21wSoTo+WYI2pgl1iGzVPPCreU+uMB3lP
 IaenOvBlNeXUy0S1tDYLFQT3wtS+lC2Yu6mG5AklDxGvAxpO73U9KJnRwpx34aCL
 Z24fW5gZkZg5KQa1ULjZLMUoCNEz24QP/V5PMmgbD3QRkxTeNliaGPvbBV5c2fJd
 WOaI4QUEah2wTtRgXv3z1SMbhAFnIoo1fAZKs1DEfT+HUO+Aw6YDpH58TdaPCs4/
 ES3Tk9lcRUITAeilbO4pGPUEq+9FPHZ+hZaevQTDcjHmE4VPG984UqFzm2Pc4NEb
 gZ6R0zKU9I7w6GFyXn3KeaPwf79CVGuJ4Zd1Is86ab2ft7V3m4cyHT7BKBkF+skD
 MQEAhroE7WL/+E1mvK/zej9I9PTbHtk50GdmKTzcnrNdFLehGz6hyMM/PnBKE2j5
 Gsw+x/DjCXQSpwVTkJCBLf3BqMpbi7C5vzwbIPYcKuE3Ggo0f1n9rUUJwCNybo21
 XhSWq8GFX35+5nBLfznXRbaU1uyvWwWvheWwR56LbTHgdPUlgVr/aa/KQ5bliz0a
 JCUUcDnUZjw=
 =+VGg
 -----END PGP SIGNATURE-----

Merge tag 'x86-kaslr-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 kaslr updates from Ingo Molnar:
 "This cleans up and simplifies the x86 KASLR code, and also fixes some
  corner case bugs"

* tag 'x86-kaslr-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits)
  x86/kaslr: Replace strlen() with strnlen()
  x86/kaslr: Add a check that the random address is in range
  x86/kaslr: Make local variables 64-bit
  x86/kaslr: Replace 'unsigned long long' with 'u64'
  x86/kaslr: Make minimum/image_size 'unsigned long'
  x86/kaslr: Small cleanup of find_random_phys_addr()
  x86/kaslr: Drop unnecessary alignment in find_random_virt_addr()
  x86/kaslr: Drop redundant check in store_slot_info()
  x86/kaslr: Make the type of number of slots/slot areas consistent
  x86/kaslr: Drop test for command-line parameters before parsing
  x86/kaslr: Simplify process_gb_huge_pages()
  x86/kaslr: Short-circuit gb_huge_pages on x86-32
  x86/kaslr: Fix off-by-one error in process_gb_huge_pages()
  x86/kaslr: Drop some redundant checks from __process_mem_region()
  x86/kaslr: Drop redundant variable in __process_mem_region()
  x86/kaslr: Eliminate 'start_orig' local variable from __process_mem_region()
  x86/kaslr: Drop redundant cur_entry from __process_mem_region()
  x86/kaslr: Fix off-by-one error in __process_mem_region()
  x86/kaslr: Initialize mem_limit to the real maximum address
  x86/kaslr: Fix process_efi_entries comment
  ...
This commit is contained in:
Linus Torvalds 2020-10-12 14:42:19 -07:00
commit b85cac5745
2 changed files with 107 additions and 135 deletions

View File

@ -36,6 +36,10 @@
#define STATIC
#include <linux/decompress/mm.h>
#define _SETUP
#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
#undef _SETUP
#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled;
unsigned int pgdir_shift __ro_after_init = 39;
@ -87,8 +91,11 @@ static unsigned long get_boot_seed(void)
static bool memmap_too_large;
/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
static unsigned long long mem_limit = ULLONG_MAX;
/*
* Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
* It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
*/
static u64 mem_limit;
/* Number of immovable memory regions */
static int num_immovable_mem;
@ -131,8 +138,7 @@ enum parse_mode {
};
static int
parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
enum parse_mode mode)
parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
{
char *oldp;
@ -162,7 +168,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
*/
*size = 0;
} else {
unsigned long long flags;
u64 flags;
/*
* efi_fake_mem=nn@ss:attr the attr specifies
@ -201,7 +207,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
unsigned long long start, size;
u64 start, size;
char *k = strchr(str, ',');
if (k)
@ -214,7 +220,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
if (start == 0) {
/* Store the specified memory limit if size > 0 */
if (size > 0)
if (size > 0 && size < mem_limit)
mem_limit = size;
continue;
@ -261,15 +267,15 @@ static void parse_gb_huge_pages(char *param, char *val)
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
size_t len = strlen((char *)args);
size_t len;
char *tmp_cmdline;
char *param, *val;
u64 mem_size;
if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
!strstr(args, "hugepages"))
if (!args)
return;
len = strnlen(args, COMMAND_LINE_SIZE-1);
tmp_cmdline = malloc(len + 1);
if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
@ -284,14 +290,12 @@ static void handle_mem_options(void)
while (*args) {
args = next_arg(args, &param, &val);
/* Stop at -- */
if (!val && strcmp(param, "--") == 0) {
warn("Only '--' specified in cmdline");
goto out;
}
if (!val && strcmp(param, "--") == 0)
break;
if (!strcmp(param, "memmap")) {
mem_avoid_memmap(PARSE_MEMMAP, val);
} else if (strstr(param, "hugepages")) {
} else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
char *p = val;
@ -300,21 +304,23 @@ static void handle_mem_options(void)
continue;
mem_size = memparse(p, &p);
if (mem_size == 0)
goto out;
break;
mem_limit = mem_size;
if (mem_size < mem_limit)
mem_limit = mem_size;
} else if (!strcmp(param, "efi_fake_mem")) {
mem_avoid_memmap(PARSE_EFI, val);
}
}
out:
free(tmp_cmdline);
return;
}
/*
* In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
* In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
* on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
*
* The mem_avoid array is used to store the ranges that need to be avoided
* when KASLR searches for an appropriate random address. We must avoid any
* regions that are unsafe to overlap with during decompression, and other
@ -392,8 +398,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
{
unsigned long init_size = boot_params->hdr.init_size;
u64 initrd_start, initrd_size;
u64 cmd_line, cmd_line_size;
char *ptr;
unsigned long cmd_line, cmd_line_size;
/*
* Avoid the region that is unsafe to overlap during
@ -414,16 +419,15 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* No need to set mapping for initrd, it will be handled in VO. */
/* Avoid kernel command line. */
cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
cmd_line |= boot_params->hdr.cmd_line_ptr;
cmd_line = get_cmd_line_ptr();
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
for (cmd_line_size = 0; ptr[cmd_line_size++];)
;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
mem_avoid[MEM_AVOID_CMDLINE].size);
if (cmd_line) {
cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
mem_avoid[MEM_AVOID_CMDLINE].size);
}
/* Avoid boot parameters. */
mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
@ -454,7 +458,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
{
int i;
struct setup_data *ptr;
unsigned long earliest = img->start + img->size;
u64 earliest = img->start + img->size;
bool is_overlapping = false;
for (i = 0; i < MEM_AVOID_MAX; i++) {
@ -499,18 +503,16 @@ static bool mem_avoid_overlap(struct mem_vector *img,
}
struct slot_area {
unsigned long addr;
int num;
u64 addr;
unsigned long num;
};
#define MAX_SLOT_AREA 100
static struct slot_area slot_areas[MAX_SLOT_AREA];
static unsigned int slot_area_index;
static unsigned long slot_max;
static unsigned long slot_area_index;
static void store_slot_info(struct mem_vector *region, unsigned long image_size)
{
struct slot_area slot_area;
@ -519,13 +521,10 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
return;
slot_area.addr = region->start;
slot_area.num = (region->size - image_size) /
CONFIG_PHYSICAL_ALIGN + 1;
slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
if (slot_area.num > 0) {
slot_areas[slot_area_index++] = slot_area;
slot_max += slot_area.num;
}
slot_areas[slot_area_index++] = slot_area;
slot_max += slot_area.num;
}
/*
@ -535,57 +534,53 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
static void
process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
{
unsigned long addr, size = 0;
u64 pud_start, pud_end;
unsigned long gb_huge_pages;
struct mem_vector tmp;
int i = 0;
if (!max_gb_huge_pages) {
if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
store_slot_info(region, image_size);
return;
}
addr = ALIGN(region->start, PUD_SIZE);
/* Did we raise the address above the passed in memory entry? */
if (addr < region->start + region->size)
size = region->size - (addr - region->start);
/* Check how many 1GB huge pages can be filtered out: */
while (size > PUD_SIZE && max_gb_huge_pages) {
size -= PUD_SIZE;
max_gb_huge_pages--;
i++;
}
/* Are there any 1GB pages in the region? */
pud_start = ALIGN(region->start, PUD_SIZE);
pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
/* No good 1GB huge pages found: */
if (!i) {
if (pud_start >= pud_end) {
store_slot_info(region, image_size);
return;
}
/*
* Skip those 'i'*1GB good huge pages, and continue checking and
* processing the remaining head or tail part of the passed region
* if available.
*/
if (addr >= region->start + image_size) {
/* Check if the head part of the region is usable. */
if (pud_start >= region->start + image_size) {
tmp.start = region->start;
tmp.size = addr - region->start;
tmp.size = pud_start - region->start;
store_slot_info(&tmp, image_size);
}
size = region->size - (addr - region->start) - i * PUD_SIZE;
if (size >= image_size) {
tmp.start = addr + i * PUD_SIZE;
tmp.size = size;
/* Skip the good 1GB pages. */
gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
if (gb_huge_pages > max_gb_huge_pages) {
pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
max_gb_huge_pages = 0;
} else {
max_gb_huge_pages -= gb_huge_pages;
}
/* Check if the tail part of the region is usable. */
if (region->start + region->size >= pud_end + image_size) {
tmp.start = pud_end;
tmp.size = region->start + region->size - pud_end;
store_slot_info(&tmp, image_size);
}
}
static unsigned long slots_fetch_random(void)
static u64 slots_fetch_random(void)
{
unsigned long slot;
int i;
unsigned int i;
/* Handle case of no slots stored. */
if (slot_max == 0)
@ -598,7 +593,7 @@ static unsigned long slots_fetch_random(void)
slot -= slot_areas[i].num;
continue;
}
return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
}
if (i == slot_area_index)
@ -611,49 +606,23 @@ static void __process_mem_region(struct mem_vector *entry,
unsigned long image_size)
{
struct mem_vector region, overlap;
unsigned long start_orig, end;
struct mem_vector cur_entry;
u64 region_end;
/* On 32-bit, ignore entries entirely above our maximum. */
if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
return;
/* Ignore entries entirely below our minimum. */
if (entry->start + entry->size < minimum)
return;
/* Ignore entries above memory limit */
end = min(entry->size + entry->start, mem_limit);
if (entry->start >= end)
return;
cur_entry.start = entry->start;
cur_entry.size = end - entry->start;
region.start = cur_entry.start;
region.size = cur_entry.size;
/* Enforce minimum and memory limit. */
region.start = max_t(u64, entry->start, minimum);
region_end = min(entry->start + entry->size, mem_limit);
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
start_orig = region.start;
/* Potentially raise address to minimum location. */
if (region.start < minimum)
region.start = minimum;
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the passed in memory entry? */
if (region.start > cur_entry.start + cur_entry.size)
if (region.start > region_end)
return;
/* Reduce size by any delta from the original address. */
region.size -= region.start - start_orig;
/* On 32-bit, reduce region size to fit within max size. */
if (IS_ENABLED(CONFIG_X86_32) &&
region.start + region.size > KERNEL_IMAGE_SIZE)
region.size = KERNEL_IMAGE_SIZE - region.start;
region.size = region_end - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
@ -666,27 +635,19 @@ static void __process_mem_region(struct mem_vector *entry,
}
/* Store beginning of region if holds at least image_size. */
if (overlap.start > region.start + image_size) {
struct mem_vector beginning;
beginning.start = region.start;
beginning.size = overlap.start - region.start;
process_gb_huge_pages(&beginning, image_size);
if (overlap.start >= region.start + image_size) {
region.size = overlap.start - region.start;
process_gb_huge_pages(&region, image_size);
}
/* Return if overlap extends to or past end of region. */
if (overlap.start + overlap.size >= region.start + region.size)
return;
/* Clip off the overlapping region and start over. */
region.size -= overlap.start - region.start + overlap.size;
region.start = overlap.start + overlap.size;
}
}
static bool process_mem_region(struct mem_vector *region,
unsigned long long minimum,
unsigned long long image_size)
unsigned long minimum,
unsigned long image_size)
{
int i;
/*
@ -709,7 +670,7 @@ static bool process_mem_region(struct mem_vector *region,
* immovable memory and @region.
*/
for (i = 0; i < num_immovable_mem; i++) {
unsigned long long start, end, entry_end, region_end;
u64 start, end, entry_end, region_end;
struct mem_vector entry;
if (!mem_overlaps(region, &immovable_mem[i]))
@ -736,8 +697,8 @@ static bool process_mem_region(struct mem_vector *region,
#ifdef CONFIG_EFI
/*
* Returns true if mirror region found (and must have been processed
* for slots adding)
* Returns true if we processed the EFI memmap, which we prefer over the E820
* table if it is available.
*/
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
@ -839,20 +800,30 @@ static void process_e820_entries(unsigned long minimum,
static unsigned long find_random_phys_addr(unsigned long minimum,
unsigned long image_size)
{
u64 phys_addr;
/* Bail out early if it's impossible to succeed. */
if (minimum + image_size > mem_limit)
return 0;
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
return 0;
}
/* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
if (!process_efi_entries(minimum, image_size))
process_e820_entries(minimum, image_size);
if (process_efi_entries(minimum, image_size))
return slots_fetch_random();
phys_addr = slots_fetch_random();
process_e820_entries(minimum, image_size);
return slots_fetch_random();
/* Perform a final check to make sure the address is in range. */
if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
warn("Invalid physical address chosen!\n");
return 0;
}
return (unsigned long)phys_addr;
}
static unsigned long find_random_virt_addr(unsigned long minimum,
@ -860,18 +831,12 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
{
unsigned long slots, random_addr;
/* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
/* Align image_size for easy slot calculations. */
image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
/*
* There are how many CONFIG_PHYSICAL_ALIGN-sized slots
* that can hold image_size within the range of minimum to
* KERNEL_IMAGE_SIZE?
*/
slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
CONFIG_PHYSICAL_ALIGN + 1;
slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
random_addr = kaslr_get_random_long("Virtual") % slots;
@ -908,6 +873,11 @@ void choose_random_location(unsigned long input,
/* Prepare to add new identity pagetables on demand. */
initialize_identity_maps();
if (IS_ENABLED(CONFIG_X86_32))
mem_limit = KERNEL_IMAGE_SIZE;
else
mem_limit = MAXMEM;
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, *output);
@ -917,6 +887,8 @@ void choose_random_location(unsigned long input,
* location:
*/
min_addr = min(*output, 512UL << 20);
/* Make sure minimum is aligned. */
min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
/* Walk available memory entries to find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);

View File

@ -70,8 +70,8 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
struct mem_vector {
unsigned long long start;
unsigned long long size;
u64 start;
u64 size;
};
#if CONFIG_RANDOMIZE_BASE