mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-26 20:44:32 +08:00
Merge branch 'x86/mm2' into x86/mm
x86/mm2 is testing out fine, but has developed conflicts with x86/mm due to patches in adjacent code. Merge them so we can drop x86/mm2 and have a unified branch. Resolved Conflicts: arch/x86/kernel/setup.c
This commit is contained in:
commit
0da3e7f526
@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
is selected automatically. Check
|
||||
Documentation/kdump/kdump.txt for further details.
|
||||
|
||||
crashkernel_low=size[KMG]
|
||||
[KNL, x86] parts under 4G.
|
||||
|
||||
crashkernel=range1:size1[,range2:size2,...][@offset]
|
||||
[KNL] Same as above, but depends on the memory
|
||||
in the running system. The syntax of range is
|
||||
|
@ -57,6 +57,10 @@ Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
|
||||
Protocol 2.11: (Kernel 3.6) Added a field for offset of EFI handover
|
||||
protocol entry point.
|
||||
|
||||
Protocol 2.12: (Kernel 3.9) Added the xloadflags field and extension fields
|
||||
to struct boot_params for for loading bzImage and ramdisk
|
||||
above 4G in 64bit.
|
||||
|
||||
**** MEMORY LAYOUT
|
||||
|
||||
The traditional memory map for the kernel loader, used for Image or
|
||||
@ -182,7 +186,7 @@ Offset Proto Name Meaning
|
||||
0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
|
||||
0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
|
||||
0235/1 2.10+ min_alignment Minimum alignment, as a power of two
|
||||
0236/2 N/A pad3 Unused
|
||||
0236/2 2.12+ xloadflags Boot protocol option flags
|
||||
0238/4 2.06+ cmdline_size Maximum size of the kernel command line
|
||||
023C/4 2.07+ hardware_subarch Hardware subarchitecture
|
||||
0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
|
||||
@ -582,6 +586,27 @@ Protocol: 2.10+
|
||||
misaligned kernel. Therefore, a loader should typically try each
|
||||
power-of-two alignment from kernel_alignment down to this alignment.
|
||||
|
||||
Field name: xloadflags
|
||||
Type: read
|
||||
Offset/size: 0x236/2
|
||||
Protocol: 2.12+
|
||||
|
||||
This field is a bitmask.
|
||||
|
||||
Bit 0 (read): XLF_KERNEL_64
|
||||
- If 1, this kernel has the legacy 64-bit entry point at 0x200.
|
||||
|
||||
Bit 1 (read): XLF_CAN_BE_LOADED_ABOVE_4G
|
||||
- If 1, kernel/boot_params/cmdline/ramdisk can be above 4G.
|
||||
|
||||
Bit 2 (read): XLF_EFI_HANDOVER_32
|
||||
- If 1, the kernel supports the 32-bit EFI handoff entry point
|
||||
given at handover_offset.
|
||||
|
||||
Bit 3 (read): XLF_EFI_HANDOVER_64
|
||||
- If 1, the kernel supports the 64-bit EFI handoff entry point
|
||||
given at handover_offset + 0x200.
|
||||
|
||||
Field name: cmdline_size
|
||||
Type: read
|
||||
Offset/size: 0x238/4
|
||||
@ -1029,6 +1054,44 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
|
||||
must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
|
||||
address of the struct boot_params; %ebp, %edi and %ebx must be zero.
|
||||
|
||||
**** 64-bit BOOT PROTOCOL
|
||||
|
||||
For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
|
||||
and we need a 64-bit boot protocol.
|
||||
|
||||
In 64-bit boot protocol, the first step in loading a Linux kernel
|
||||
should be to setup the boot parameters (struct boot_params,
|
||||
traditionally known as "zero page"). The memory for struct boot_params
|
||||
could be allocated anywhere (even above 4G) and initialized to all zero.
|
||||
Then, the setup header at offset 0x01f1 of kernel image on should be
|
||||
loaded into struct boot_params and examined. The end of setup header
|
||||
can be calculated as follows:
|
||||
|
||||
0x0202 + byte value at offset 0x0201
|
||||
|
||||
In addition to read/modify/write the setup header of the struct
|
||||
boot_params as that of 16-bit boot protocol, the boot loader should
|
||||
also fill the additional fields of the struct boot_params as described
|
||||
in zero-page.txt.
|
||||
|
||||
After setting up the struct boot_params, the boot loader can load
|
||||
64-bit kernel in the same way as that of 16-bit boot protocol, but
|
||||
kernel could be loaded above 4G.
|
||||
|
||||
In 64-bit boot protocol, the kernel is started by jumping to the
|
||||
64-bit kernel entry point, which is the start address of loaded
|
||||
64-bit kernel plus 0x200.
|
||||
|
||||
At entry, the CPU must be in 64-bit mode with paging enabled.
|
||||
The range with setup_header.init_size from start address of loaded
|
||||
kernel and zero page and command line buffer get ident mapping;
|
||||
a GDT must be loaded with the descriptors for selectors
|
||||
__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
|
||||
segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
|
||||
must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
|
||||
must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
|
||||
address of the struct boot_params.
|
||||
|
||||
**** EFI HANDOVER PROTOCOL
|
||||
|
||||
This protocol allows boot loaders to defer initialisation to the EFI
|
||||
|
@ -19,6 +19,9 @@ Offset Proto Name Meaning
|
||||
090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
|
||||
0A0/010 ALL sys_desc_table System description table (struct sys_desc_table)
|
||||
0B0/010 ALL olpc_ofw_header OLPC's OpenFirmware CIF and friends
|
||||
0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
|
||||
0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
|
||||
0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
|
||||
140/080 ALL edid_info Video mode setup (struct edid_info)
|
||||
1C0/020 ALL efi_info EFI 32 information (struct efi_info)
|
||||
1E0/004 ALL alk_mem_k Alternative mem check, in KB
|
||||
@ -27,6 +30,7 @@ Offset Proto Name Meaning
|
||||
1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
|
||||
1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
|
||||
(below)
|
||||
1EF/001 ALL sentinel Used to detect broken bootloaders
|
||||
290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
|
||||
2D0/A00 ALL e820_map E820 memory map table
|
||||
(array of struct e820entry)
|
||||
|
@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void)
|
||||
|
||||
octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
|
||||
|
||||
swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
|
||||
if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
|
||||
panic("Cannot allocate SWIOTLB buffer");
|
||||
|
||||
mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
|
||||
}
|
||||
|
@ -2021,6 +2021,16 @@ static void __init patch_tlb_miss_handler_bitmap(void)
|
||||
flushi(&valid_addr_bitmap_insn[0]);
|
||||
}
|
||||
|
||||
static void __init register_page_bootmem_info(void)
|
||||
{
|
||||
#ifdef CONFIG_NEED_MULTIPLE_NODES
|
||||
int i;
|
||||
|
||||
for_each_online_node(i)
|
||||
if (NODE_DATA(i)->node_spanned_pages)
|
||||
register_page_bootmem_info_node(NODE_DATA(i));
|
||||
#endif
|
||||
}
|
||||
void __init mem_init(void)
|
||||
{
|
||||
unsigned long codepages, datapages, initpages;
|
||||
@ -2038,20 +2048,8 @@ void __init mem_init(void)
|
||||
|
||||
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
|
||||
|
||||
#ifdef CONFIG_NEED_MULTIPLE_NODES
|
||||
{
|
||||
int i;
|
||||
for_each_online_node(i) {
|
||||
if (NODE_DATA(i)->node_spanned_pages != 0) {
|
||||
totalram_pages +=
|
||||
free_all_bootmem_node(NODE_DATA(i));
|
||||
}
|
||||
}
|
||||
totalram_pages += free_low_memory_core_early(MAX_NUMNODES);
|
||||
}
|
||||
#else
|
||||
register_page_bootmem_info();
|
||||
totalram_pages = free_all_bootmem();
|
||||
#endif
|
||||
|
||||
/* We subtract one to account for the mem_map_zero page
|
||||
* allocated below.
|
||||
|
@ -285,16 +285,26 @@ struct biosregs {
|
||||
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
|
||||
|
||||
/* cmdline.c */
|
||||
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize);
|
||||
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option);
|
||||
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
|
||||
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
|
||||
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize);
|
||||
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
||||
|
||||
if (cmd_line_ptr >= 0x100000)
|
||||
return -1; /* inaccessible */
|
||||
|
||||
return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
|
||||
}
|
||||
|
||||
static inline int cmdline_find_option_bool(const char *option)
|
||||
{
|
||||
return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option);
|
||||
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
||||
|
||||
if (cmd_line_ptr >= 0x100000)
|
||||
return -1; /* inaccessible */
|
||||
|
||||
return __cmdline_find_option_bool(cmd_line_ptr, option);
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
|
||||
* Returns the length of the argument (regardless of if it was
|
||||
* truncated to fit in the buffer), or -1 on not found.
|
||||
*/
|
||||
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)
|
||||
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
addr_t cptr;
|
||||
char c;
|
||||
@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
|
||||
st_bufcpy /* Copying this to buffer */
|
||||
} state = st_wordstart;
|
||||
|
||||
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
|
||||
return -1; /* No command line, or inaccessible */
|
||||
if (!cmdline_ptr)
|
||||
return -1; /* No command line */
|
||||
|
||||
cptr = cmdline_ptr & 0xf;
|
||||
set_fs(cmdline_ptr >> 4);
|
||||
@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
|
||||
* Returns the position of that option (starts counting with 1)
|
||||
* or 0 on not found
|
||||
*/
|
||||
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
|
||||
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
|
||||
{
|
||||
addr_t cptr;
|
||||
char c;
|
||||
@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
|
||||
st_wordskip, /* Miscompare, skip */
|
||||
} state = st_wordstart;
|
||||
|
||||
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
|
||||
return -1; /* No command line, or inaccessible */
|
||||
if (!cmdline_ptr)
|
||||
return -1; /* No command line */
|
||||
|
||||
cptr = cmdline_ptr & 0xf;
|
||||
set_fs(cmdline_ptr >> 4);
|
||||
|
@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
|
||||
return *((char *)(fs + addr));
|
||||
}
|
||||
#include "../cmdline.c"
|
||||
static unsigned long get_cmd_line_ptr(void)
|
||||
{
|
||||
unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
|
||||
|
||||
cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
|
||||
|
||||
return cmd_line_ptr;
|
||||
}
|
||||
int cmdline_find_option(const char *option, char *buffer, int bufsize)
|
||||
{
|
||||
return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize);
|
||||
return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
|
||||
}
|
||||
int cmdline_find_option_bool(const char *option)
|
||||
{
|
||||
return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
|
||||
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -37,6 +37,12 @@
|
||||
__HEAD
|
||||
.code32
|
||||
ENTRY(startup_32)
|
||||
/*
|
||||
* 32bit entry is 0 and it is ABI so immutable!
|
||||
* If we come here directly from a bootloader,
|
||||
* kernel(text+data+bss+brk) ramdisk, zero_page, command line
|
||||
* all need to be under the 4G limit.
|
||||
*/
|
||||
cld
|
||||
/*
|
||||
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
@ -154,6 +160,12 @@ ENTRY(startup_32)
|
||||
btsl $_EFER_LME, %eax
|
||||
wrmsr
|
||||
|
||||
/* After gdt is loaded */
|
||||
xorl %eax, %eax
|
||||
lldt %ax
|
||||
movl $0x20, %eax
|
||||
ltr %ax
|
||||
|
||||
/*
|
||||
* Setup for the jump to 64bit mode
|
||||
*
|
||||
@ -176,28 +188,18 @@ ENTRY(startup_32)
|
||||
lret
|
||||
ENDPROC(startup_32)
|
||||
|
||||
no_longmode:
|
||||
/* This isn't an x86-64 CPU so hang */
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
#include "../../kernel/verify_cpu.S"
|
||||
|
||||
/*
|
||||
* Be careful here startup_64 needs to be at a predictable
|
||||
* address so I can export it in an ELF header. Bootloaders
|
||||
* should look at the ELF header to find this address, as
|
||||
* it may change in the future.
|
||||
*/
|
||||
.code64
|
||||
.org 0x200
|
||||
ENTRY(startup_64)
|
||||
/*
|
||||
* 64bit entry is 0x200 and it is ABI so immutable!
|
||||
* We come here either from startup_32 or directly from a
|
||||
* 64bit bootloader. If we come here from a bootloader we depend on
|
||||
* an identity mapped page table being provied that maps our
|
||||
* entire text+data+bss and hopefully all of memory.
|
||||
* 64bit bootloader.
|
||||
* If we come here from a bootloader, kernel(text+data+bss+brk),
|
||||
* ramdisk, zero_page, command line could be above 4G.
|
||||
* We depend on an identity mapped page table being provided
|
||||
* that maps our entire kernel(text+data+bss+brk), zero page
|
||||
* and command line.
|
||||
*/
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
/*
|
||||
@ -247,9 +249,6 @@ preferred_addr:
|
||||
movl %eax, %ss
|
||||
movl %eax, %fs
|
||||
movl %eax, %gs
|
||||
lldt %ax
|
||||
movl $0x20, %eax
|
||||
ltr %ax
|
||||
|
||||
/*
|
||||
* Compute the decompressed kernel start address. It is where
|
||||
@ -349,6 +348,15 @@ relocated:
|
||||
*/
|
||||
jmp *%rbp
|
||||
|
||||
.code32
|
||||
no_longmode:
|
||||
/* This isn't an x86-64 CPU so hang */
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
#include "../../kernel/verify_cpu.S"
|
||||
|
||||
.data
|
||||
gdt:
|
||||
.word gdt_end - gdt
|
||||
|
@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
|
||||
{
|
||||
real_mode = rmode;
|
||||
|
||||
sanitize_boot_params(real_mode);
|
||||
|
||||
if (real_mode->screen_info.orig_video_mode == 7) {
|
||||
vidmem = (char *) 0xb0000;
|
||||
vidport = 0x3b4;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/bootparam_utils.h>
|
||||
|
||||
#define BOOT_BOOT_H
|
||||
#include "../ctype.h"
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <asm/e820.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include "boot.h"
|
||||
#include "voffset.h"
|
||||
#include "zoffset.h"
|
||||
@ -255,6 +256,9 @@ section_table:
|
||||
# header, from the old boot sector.
|
||||
|
||||
.section ".header", "a"
|
||||
.globl sentinel
|
||||
sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
|
||||
|
||||
.globl hdr
|
||||
hdr:
|
||||
setup_sects: .byte 0 /* Filled in by build.c */
|
||||
@ -279,7 +283,7 @@ _start:
|
||||
# Part 2 of the header, from the old setup.S
|
||||
|
||||
.ascii "HdrS" # header signature
|
||||
.word 0x020b # header version number (>= 0x0105)
|
||||
.word 0x020c # header version number (>= 0x0105)
|
||||
# or else old loadlin-1.5 will fail)
|
||||
.globl realmode_swtch
|
||||
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
|
||||
@ -297,13 +301,7 @@ type_of_loader: .byte 0 # 0 means ancient bootloader, newer
|
||||
|
||||
# flags, unused bits must be zero (RFU) bit within loadflags
|
||||
loadflags:
|
||||
LOADED_HIGH = 1 # If set, the kernel is loaded high
|
||||
CAN_USE_HEAP = 0x80 # If set, the loader also has set
|
||||
# heap_end_ptr to tell how much
|
||||
# space behind setup.S can be used for
|
||||
# heap purposes.
|
||||
# Only the loader knows what is free
|
||||
.byte LOADED_HIGH
|
||||
.byte LOADED_HIGH # The kernel is to be loaded high
|
||||
|
||||
setup_move_size: .word 0x8000 # size to move, when setup is not
|
||||
# loaded at 0x90000. We will move setup
|
||||
@ -369,7 +367,31 @@ relocatable_kernel: .byte 1
|
||||
relocatable_kernel: .byte 0
|
||||
#endif
|
||||
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
|
||||
pad3: .word 0
|
||||
|
||||
xloadflags:
|
||||
#ifdef CONFIG_X86_64
|
||||
# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
|
||||
#else
|
||||
# define XLF0 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
|
||||
/* kernel/boot_param/ramdisk could be loaded above 4g */
|
||||
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
|
||||
#else
|
||||
# define XLF1 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
# ifdef CONFIG_X86_64
|
||||
# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
|
||||
# else
|
||||
# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
|
||||
# endif
|
||||
#else
|
||||
# define XLF23 0
|
||||
#endif
|
||||
.word XLF0 | XLF1 | XLF23
|
||||
|
||||
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
|
||||
#added with boot protocol
|
||||
@ -397,8 +419,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
|
||||
#define INIT_SIZE VO_INIT_SIZE
|
||||
#endif
|
||||
init_size: .long INIT_SIZE # kernel initialization size
|
||||
handover_offset: .long 0x30 # offset to the handover
|
||||
handover_offset:
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
.long 0x30 # offset to the handover
|
||||
# protocol entry point
|
||||
#else
|
||||
.long 0
|
||||
#endif
|
||||
|
||||
# End of setup header #####################################################
|
||||
|
||||
|
@ -13,7 +13,7 @@ SECTIONS
|
||||
.bstext : { *(.bstext) }
|
||||
.bsdata : { *(.bsdata) }
|
||||
|
||||
. = 497;
|
||||
. = 495;
|
||||
.header : { *(.header) }
|
||||
.entrytext : { *(.entrytext) }
|
||||
.inittext : { *(.inittext) }
|
||||
|
38
arch/x86/include/asm/bootparam_utils.h
Normal file
38
arch/x86/include/asm/bootparam_utils.h
Normal file
@ -0,0 +1,38 @@
|
||||
#ifndef _ASM_X86_BOOTPARAM_UTILS_H
|
||||
#define _ASM_X86_BOOTPARAM_UTILS_H
|
||||
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
/*
|
||||
* This file is included from multiple environments. Do not
|
||||
* add completing #includes to make it standalone.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Deal with bootloaders which fail to initialize unknown fields in
|
||||
* boot_params to zero. The list fields in this list are taken from
|
||||
* analysis of kexec-tools; if other broken bootloaders initialize a
|
||||
* different set of fields we will need to figure out how to disambiguate.
|
||||
*
|
||||
*/
|
||||
static void sanitize_boot_params(struct boot_params *boot_params)
|
||||
{
|
||||
if (boot_params->sentinel) {
|
||||
/*fields in boot_params are not valid, clear them */
|
||||
memset(&boot_params->olpc_ofw_header, 0,
|
||||
(char *)&boot_params->alt_mem_k -
|
||||
(char *)&boot_params->olpc_ofw_header);
|
||||
memset(&boot_params->kbd_status, 0,
|
||||
(char *)&boot_params->hdr -
|
||||
(char *)&boot_params->kbd_status);
|
||||
memset(&boot_params->_pad7[0], 0,
|
||||
(char *)&boot_params->edd_mbr_sig_buffer[0] -
|
||||
(char *)&boot_params->_pad7[0]);
|
||||
memset(&boot_params->_pad8[0], 0,
|
||||
(char *)&boot_params->eddbuf[0] -
|
||||
(char *)&boot_params->_pad8[0]);
|
||||
memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
|
@ -1,20 +1,14 @@
|
||||
#ifndef _ASM_X86_INIT_32_H
|
||||
#define _ASM_X86_INIT_32_H
|
||||
#ifndef _ASM_X86_INIT_H
|
||||
#define _ASM_X86_INIT_H
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
extern void __init early_ioremap_page_table_range_init(void);
|
||||
#endif
|
||||
struct x86_mapping_info {
|
||||
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
|
||||
void *context; /* context for alloc_pgt_page */
|
||||
unsigned long pmd_flag; /* page flag for PMD entry */
|
||||
bool kernel_mapping; /* kernel mapping or ident mapping */
|
||||
};
|
||||
|
||||
extern void __init zone_sizes_init(void);
|
||||
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
||||
unsigned long addr, unsigned long end);
|
||||
|
||||
extern unsigned long __init
|
||||
kernel_physical_mapping_init(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long page_size_mask);
|
||||
|
||||
|
||||
extern unsigned long __initdata pgt_buf_start;
|
||||
extern unsigned long __meminitdata pgt_buf_end;
|
||||
extern unsigned long __meminitdata pgt_buf_top;
|
||||
|
||||
#endif /* _ASM_X86_INIT_32_H */
|
||||
#endif /* _ASM_X86_INIT_H */
|
||||
|
@ -48,11 +48,11 @@
|
||||
# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
|
||||
#else
|
||||
/* Maximum physical address we can use pages from */
|
||||
# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
|
||||
# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
|
||||
/* Maximum address we can reach in physical address mode */
|
||||
# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
|
||||
# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
|
||||
/* Maximum address we can use for the control pages */
|
||||
# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
|
||||
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
|
||||
|
||||
/* Allocate one page for the pdp and the second for the code */
|
||||
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
|
||||
|
@ -54,8 +54,6 @@ static inline int numa_cpu_node(int cpu)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# include <asm/numa_32.h>
|
||||
#else
|
||||
# include <asm/numa_64.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@ -1,6 +0,0 @@
|
||||
#ifndef _ASM_X86_NUMA_64_H
|
||||
#define _ASM_X86_NUMA_64_H
|
||||
|
||||
extern unsigned long numa_free_all_bootmem(void);
|
||||
|
||||
#endif /* _ASM_X86_NUMA_64_H */
|
@ -17,6 +17,10 @@
|
||||
|
||||
struct page;
|
||||
|
||||
#include <linux/range.h>
|
||||
extern struct range pfn_mapped[];
|
||||
extern int nr_pfn_mapped;
|
||||
|
||||
static inline void clear_user_page(void *page, unsigned long vaddr,
|
||||
struct page *pg)
|
||||
{
|
||||
|
@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void)
|
||||
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
|
||||
|
||||
extern unsigned long init_memory_mapping(unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
|
@ -616,6 +616,8 @@ static inline int pgd_none(pgd_t pgd)
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
extern int direct_gbpages;
|
||||
void init_mem_mapping(void);
|
||||
void early_alloc_pgt_buf(void);
|
||||
|
||||
/* local pte updates need not use xchg for locking */
|
||||
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef _ASM_X86_PGTABLE_64_DEFS_H
|
||||
#define _ASM_X86_PGTABLE_64_DEFS_H
|
||||
|
||||
#include <asm/sparsemem.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
|
||||
@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
#define MODULES_END _AC(0xffffffffff000000, UL)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
|
||||
#define EARLY_DYNAMIC_PAGE_TABLES 64
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
|
||||
|
@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
|
||||
/* Install a pte for a particular vaddr in kernel space. */
|
||||
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
|
||||
|
||||
extern void native_pagetable_reserve(u64 start, u64 end);
|
||||
#ifdef CONFIG_X86_32
|
||||
extern void native_pagetable_init(void);
|
||||
#else
|
||||
|
@ -731,6 +731,7 @@ extern void enable_sep_cpu(void);
|
||||
extern int sysenter_setup(void);
|
||||
|
||||
extern void early_trap_init(void);
|
||||
void early_trap_pf_init(void);
|
||||
|
||||
/* Defined in head.S */
|
||||
extern struct desc_ptr early_gdt_descr;
|
||||
|
@ -58,6 +58,7 @@ extern unsigned char boot_gdt[];
|
||||
extern unsigned char secondary_startup_64[];
|
||||
#endif
|
||||
|
||||
extern void __init setup_real_mode(void);
|
||||
void reserve_real_mode(void);
|
||||
void setup_real_mode(void);
|
||||
|
||||
#endif /* _ARCH_X86_REALMODE_H */
|
||||
|
@ -68,17 +68,6 @@ struct x86_init_oem {
|
||||
void (*banner)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_init_mapping - platform specific initial kernel pagetable setup
|
||||
* @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
|
||||
*
|
||||
* For more details on the purpose of this hook, look in
|
||||
* init_memory_mapping and the commit that added it.
|
||||
*/
|
||||
struct x86_init_mapping {
|
||||
void (*pagetable_reserve)(u64 start, u64 end);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_init_paging - platform specific paging functions
|
||||
* @pagetable_init: platform specific paging initialization call to setup
|
||||
@ -136,7 +125,6 @@ struct x86_init_ops {
|
||||
struct x86_init_mpparse mpparse;
|
||||
struct x86_init_irqs irqs;
|
||||
struct x86_init_oem oem;
|
||||
struct x86_init_mapping mapping;
|
||||
struct x86_init_paging paging;
|
||||
struct x86_init_timers timers;
|
||||
struct x86_init_iommu iommu;
|
||||
|
@ -1,6 +1,31 @@
|
||||
#ifndef _ASM_X86_BOOTPARAM_H
|
||||
#define _ASM_X86_BOOTPARAM_H
|
||||
|
||||
/* setup_data types */
|
||||
#define SETUP_NONE 0
|
||||
#define SETUP_E820_EXT 1
|
||||
#define SETUP_DTB 2
|
||||
#define SETUP_PCI 3
|
||||
|
||||
/* ram_size flags */
|
||||
#define RAMDISK_IMAGE_START_MASK 0x07FF
|
||||
#define RAMDISK_PROMPT_FLAG 0x8000
|
||||
#define RAMDISK_LOAD_FLAG 0x4000
|
||||
|
||||
/* loadflags */
|
||||
#define LOADED_HIGH (1<<0)
|
||||
#define QUIET_FLAG (1<<5)
|
||||
#define KEEP_SEGMENTS (1<<6)
|
||||
#define CAN_USE_HEAP (1<<7)
|
||||
|
||||
/* xloadflags */
|
||||
#define XLF_KERNEL_64 (1<<0)
|
||||
#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
|
||||
#define XLF_EFI_HANDOVER_32 (1<<2)
|
||||
#define XLF_EFI_HANDOVER_64 (1<<3)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/screen_info.h>
|
||||
#include <linux/apm_bios.h>
|
||||
@ -9,12 +34,6 @@
|
||||
#include <asm/ist.h>
|
||||
#include <video/edid.h>
|
||||
|
||||
/* setup data types */
|
||||
#define SETUP_NONE 0
|
||||
#define SETUP_E820_EXT 1
|
||||
#define SETUP_DTB 2
|
||||
#define SETUP_PCI 3
|
||||
|
||||
/* extensible setup data list node */
|
||||
struct setup_data {
|
||||
__u64 next;
|
||||
@ -28,9 +47,6 @@ struct setup_header {
|
||||
__u16 root_flags;
|
||||
__u32 syssize;
|
||||
__u16 ram_size;
|
||||
#define RAMDISK_IMAGE_START_MASK 0x07FF
|
||||
#define RAMDISK_PROMPT_FLAG 0x8000
|
||||
#define RAMDISK_LOAD_FLAG 0x4000
|
||||
__u16 vid_mode;
|
||||
__u16 root_dev;
|
||||
__u16 boot_flag;
|
||||
@ -42,10 +58,6 @@ struct setup_header {
|
||||
__u16 kernel_version;
|
||||
__u8 type_of_loader;
|
||||
__u8 loadflags;
|
||||
#define LOADED_HIGH (1<<0)
|
||||
#define QUIET_FLAG (1<<5)
|
||||
#define KEEP_SEGMENTS (1<<6)
|
||||
#define CAN_USE_HEAP (1<<7)
|
||||
__u16 setup_move_size;
|
||||
__u32 code32_start;
|
||||
__u32 ramdisk_image;
|
||||
@ -58,7 +70,8 @@ struct setup_header {
|
||||
__u32 initrd_addr_max;
|
||||
__u32 kernel_alignment;
|
||||
__u8 relocatable_kernel;
|
||||
__u8 _pad2[3];
|
||||
__u8 min_alignment;
|
||||
__u16 xloadflags;
|
||||
__u32 cmdline_size;
|
||||
__u32 hardware_subarch;
|
||||
__u64 hardware_subarch_data;
|
||||
@ -106,7 +119,10 @@ struct boot_params {
|
||||
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
|
||||
struct sys_desc_table sys_desc_table; /* 0x0a0 */
|
||||
struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */
|
||||
__u8 _pad4[128]; /* 0x0c0 */
|
||||
__u32 ext_ramdisk_image; /* 0x0c0 */
|
||||
__u32 ext_ramdisk_size; /* 0x0c4 */
|
||||
__u32 ext_cmd_line_ptr; /* 0x0c8 */
|
||||
__u8 _pad4[116]; /* 0x0cc */
|
||||
struct edid_info edid_info; /* 0x140 */
|
||||
struct efi_info efi_info; /* 0x1c0 */
|
||||
__u32 alt_mem_k; /* 0x1e0 */
|
||||
@ -115,7 +131,20 @@ struct boot_params {
|
||||
__u8 eddbuf_entries; /* 0x1e9 */
|
||||
__u8 edd_mbr_sig_buf_entries; /* 0x1ea */
|
||||
__u8 kbd_status; /* 0x1eb */
|
||||
__u8 _pad6[5]; /* 0x1ec */
|
||||
__u8 _pad5[3]; /* 0x1ec */
|
||||
/*
|
||||
* The sentinel is set to a nonzero value (0xff) in header.S.
|
||||
*
|
||||
* A bootloader is supposed to only take setup_header and put
|
||||
* it into a clean boot_params buffer. If it turns out that
|
||||
* it is clumsy or too generous with the buffer, it most
|
||||
* probably will pick up the sentinel variable too. The fact
|
||||
* that this variable then is still 0xff will let kernel
|
||||
* know that some variables in boot_params are invalid and
|
||||
* kernel should zero out certain portions of boot_params.
|
||||
*/
|
||||
__u8 sentinel; /* 0x1ef */
|
||||
__u8 _pad6[1]; /* 0x1f0 */
|
||||
struct setup_header hdr; /* setup header */ /* 0x1f1 */
|
||||
__u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
|
||||
__u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
|
||||
@ -134,6 +163,6 @@ enum {
|
||||
X86_NR_SUBARCHS,
|
||||
};
|
||||
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_BOOTPARAM_H */
|
||||
|
@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/proto.h>
|
||||
# include <asm/numa_64.h>
|
||||
#endif /* X86 */
|
||||
|
||||
#define BAD_MADT_ENTRY(entry, end) ( \
|
||||
|
@ -768,10 +768,9 @@ int __init gart_iommu_init(void)
|
||||
aper_base = info.aper_base;
|
||||
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
|
||||
|
||||
if (end_pfn > max_low_pfn_mapped) {
|
||||
start_pfn = (aper_base>>PAGE_SHIFT);
|
||||
start_pfn = PFN_DOWN(aper_base);
|
||||
if (!pfn_range_is_mapped(start_pfn, end_pfn))
|
||||
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
|
||||
}
|
||||
|
||||
pr_info("PCI-DMA: using GART IOMMU.\n");
|
||||
iommu_size = check_iommu_size(info.aper_base, aper_size);
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <asm/pci-direct.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/numa_64.h>
|
||||
# include <asm/mmconfig.h>
|
||||
# include <asm/cacheflush.h>
|
||||
#endif
|
||||
@ -685,12 +684,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
* benefit in doing so.
|
||||
*/
|
||||
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
|
||||
unsigned long pfn = tseg >> PAGE_SHIFT;
|
||||
|
||||
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
|
||||
if ((tseg>>PMD_SHIFT) <
|
||||
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
|
||||
((tseg>>PMD_SHIFT) <
|
||||
(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
|
||||
(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
|
||||
if (pfn_range_is_mapped(pfn, pfn + 1))
|
||||
set_memory_4k((unsigned long)__va(tseg), 1);
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,6 @@
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
#include <asm/numa_64.h>
|
||||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
|
@ -835,7 +835,7 @@ static int __init parse_memopt(char *p)
|
||||
}
|
||||
early_param("mem", parse_memopt);
|
||||
|
||||
static int __init parse_memmap_opt(char *p)
|
||||
static int __init parse_memmap_one(char *p)
|
||||
{
|
||||
char *oldp;
|
||||
u64 start_at, mem_size;
|
||||
@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p)
|
||||
|
||||
return *p == '\0' ? 0 : -EINVAL;
|
||||
}
|
||||
static int __init parse_memmap_opt(char *str)
|
||||
{
|
||||
while (str) {
|
||||
char *k = strchr(str, ',');
|
||||
|
||||
if (k)
|
||||
*k++ = 0;
|
||||
|
||||
parse_memmap_one(str);
|
||||
str = k;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("memmap", parse_memmap_opt);
|
||||
|
||||
void __init finish_e820_parsing(void)
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/bootparam_utils.h>
|
||||
|
||||
static void __init i386_default_early_setup(void)
|
||||
{
|
||||
@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void)
|
||||
|
||||
void __init i386_start_kernel(void)
|
||||
{
|
||||
memblock_reserve(__pa_symbol(_text),
|
||||
(unsigned long)__bss_stop - (unsigned long)_text);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
/* Reserve INITRD */
|
||||
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
||||
/* Assume only end is not page aligned */
|
||||
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
|
||||
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
|
||||
}
|
||||
#endif
|
||||
sanitize_boot_params(&boot_params);
|
||||
|
||||
/* Call the subarch specific early setup function */
|
||||
switch (boot_params.hdr.hardware_subarch) {
|
||||
@ -57,11 +46,5 @@ void __init i386_start_kernel(void)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point everything still needed from the boot loader
|
||||
* or BIOS or kernel text should be early reserved or marked not
|
||||
* RAM in e820. All other memory is free game.
|
||||
*/
|
||||
|
||||
start_kernel();
|
||||
}
|
||||
|
@ -25,12 +25,83 @@
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
#include <asm/bootparam_utils.h>
|
||||
|
||||
static void __init zap_identity_mappings(void)
|
||||
/*
|
||||
* Manage page tables very early on.
|
||||
*/
|
||||
extern pgd_t early_level4_pgt[PTRS_PER_PGD];
|
||||
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
|
||||
static unsigned int __initdata next_early_pgt = 2;
|
||||
|
||||
/* Wipe all early page tables except for the kernel symbol map */
|
||||
static void __init reset_early_page_tables(void)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset_k(0UL);
|
||||
pgd_clear(pgd);
|
||||
__flush_tlb_all();
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PGD-1; i++)
|
||||
early_level4_pgt[i].pgd = 0;
|
||||
|
||||
next_early_pgt = 0;
|
||||
|
||||
write_cr3(__pa(early_level4_pgt));
|
||||
}
|
||||
|
||||
/* Create a new PMD entry */
|
||||
int __init early_make_pgtable(unsigned long address)
|
||||
{
|
||||
unsigned long physaddr = address - __PAGE_OFFSET;
|
||||
unsigned long i;
|
||||
pgdval_t pgd, *pgd_p;
|
||||
pudval_t pud, *pud_p;
|
||||
pmdval_t pmd, *pmd_p;
|
||||
|
||||
/* Invalid address or early pgt is done ? */
|
||||
if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
|
||||
return -1;
|
||||
|
||||
again:
|
||||
pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
|
||||
pgd = *pgd_p;
|
||||
|
||||
/*
|
||||
* The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
|
||||
* critical -- __PAGE_OFFSET would point us back into the dynamic
|
||||
* range and we might end up looping forever...
|
||||
*/
|
||||
if (pgd)
|
||||
pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
||||
else {
|
||||
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
||||
reset_early_page_tables();
|
||||
goto again;
|
||||
}
|
||||
|
||||
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
pud_p[i] = 0;
|
||||
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
||||
}
|
||||
pud_p += pud_index(address);
|
||||
pud = *pud_p;
|
||||
|
||||
if (pud)
|
||||
pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
|
||||
else {
|
||||
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
|
||||
reset_early_page_tables();
|
||||
goto again;
|
||||
}
|
||||
|
||||
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
|
||||
for (i = 0; i < PTRS_PER_PMD; i++)
|
||||
pmd_p[i] = 0;
|
||||
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
|
||||
}
|
||||
pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL);
|
||||
pmd_p[pmd_index(address)] = pmd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Don't add a printk in there. printk relies on the PDA which is not initialized
|
||||
@ -41,13 +112,25 @@ static void __init clear_bss(void)
|
||||
(unsigned long) __bss_stop - (unsigned long) __bss_start);
|
||||
}
|
||||
|
||||
static unsigned long get_cmd_line_ptr(void)
|
||||
{
|
||||
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
|
||||
|
||||
cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
|
||||
|
||||
return cmd_line_ptr;
|
||||
}
|
||||
|
||||
static void __init copy_bootdata(char *real_mode_data)
|
||||
{
|
||||
char * command_line;
|
||||
unsigned long cmd_line_ptr;
|
||||
|
||||
memcpy(&boot_params, real_mode_data, sizeof boot_params);
|
||||
if (boot_params.hdr.cmd_line_ptr) {
|
||||
command_line = __va(boot_params.hdr.cmd_line_ptr);
|
||||
sanitize_boot_params(&boot_params);
|
||||
cmd_line_ptr = get_cmd_line_ptr();
|
||||
if (cmd_line_ptr) {
|
||||
command_line = __va(cmd_line_ptr);
|
||||
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
|
||||
}
|
||||
}
|
||||
@ -70,14 +153,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
||||
(__START_KERNEL & PGDIR_MASK)));
|
||||
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
||||
|
||||
/* Kill off the identity-map trampoline */
|
||||
reset_early_page_tables();
|
||||
|
||||
/* clear bss before set_intr_gate with early_idt_handler */
|
||||
clear_bss();
|
||||
|
||||
/* Make NULL pointers segfault */
|
||||
zap_identity_mappings();
|
||||
|
||||
max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
|
||||
|
||||
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
set_intr_gate(i, &early_idt_handlers[i]);
|
||||
@ -87,37 +168,25 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
||||
}
|
||||
load_idt((const struct desc_ptr *)&idt_descr);
|
||||
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
if (console_loglevel == 10)
|
||||
early_printk("Kernel alive\n");
|
||||
|
||||
clear_page(init_level4_pgt);
|
||||
/* set init_level4_pgt kernel high mapping*/
|
||||
init_level4_pgt[511] = early_level4_pgt[511];
|
||||
|
||||
x86_64_start_reservations(real_mode_data);
|
||||
}
|
||||
|
||||
void __init x86_64_start_reservations(char *real_mode_data)
|
||||
{
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
memblock_reserve(__pa_symbol(_text),
|
||||
(unsigned long)__bss_stop - (unsigned long)_text);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
/* Reserve INITRD */
|
||||
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
||||
/* Assume only end is not page aligned */
|
||||
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
|
||||
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
|
||||
}
|
||||
#endif
|
||||
/* version is always not zero if it is copied */
|
||||
if (!boot_params.hdr.version)
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
reserve_ebda_region();
|
||||
|
||||
/*
|
||||
* At this point everything still needed from the boot loader
|
||||
* or BIOS or kernel text should be early reserved or marked not
|
||||
* RAM in e820. All other memory is free game.
|
||||
*/
|
||||
|
||||
start_kernel();
|
||||
}
|
||||
|
@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
.code64
|
||||
.globl startup_64
|
||||
startup_64:
|
||||
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
|
||||
* and someone has loaded an identity mapped page table
|
||||
* for us. These identity mapped page tables map all of the
|
||||
* kernel pages and possibly all of memory.
|
||||
*
|
||||
* %esi holds a physical pointer to real_mode_data.
|
||||
* %rsi holds a physical pointer to real_mode_data.
|
||||
*
|
||||
* We come here either directly from a 64bit bootloader, or from
|
||||
* arch/x86_64/boot/compressed/head.S.
|
||||
@ -66,7 +65,8 @@ startup_64:
|
||||
* tables and then reload them.
|
||||
*/
|
||||
|
||||
/* Compute the delta between the address I am compiled to run at and the
|
||||
/*
|
||||
* Compute the delta between the address I am compiled to run at and the
|
||||
* address I am actually running at.
|
||||
*/
|
||||
leaq _text(%rip), %rbp
|
||||
@ -78,45 +78,62 @@ startup_64:
|
||||
testl %eax, %eax
|
||||
jnz bad_address
|
||||
|
||||
/* Is the address too large? */
|
||||
leaq _text(%rip), %rdx
|
||||
movq $PGDIR_SIZE, %rax
|
||||
cmpq %rax, %rdx
|
||||
jae bad_address
|
||||
|
||||
/* Fixup the physical addresses in the page table
|
||||
/*
|
||||
* Is the address too large?
|
||||
*/
|
||||
addq %rbp, init_level4_pgt + 0(%rip)
|
||||
addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
|
||||
addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
|
||||
leaq _text(%rip), %rax
|
||||
shrq $MAX_PHYSMEM_BITS, %rax
|
||||
jnz bad_address
|
||||
|
||||
addq %rbp, level3_ident_pgt + 0(%rip)
|
||||
/*
|
||||
* Fixup the physical addresses in the page table
|
||||
*/
|
||||
addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
|
||||
|
||||
addq %rbp, level3_kernel_pgt + (510*8)(%rip)
|
||||
addq %rbp, level3_kernel_pgt + (511*8)(%rip)
|
||||
|
||||
addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
|
||||
|
||||
/* Add an Identity mapping if I am above 1G */
|
||||
/*
|
||||
* Set up the identity mapping for the switchover. These
|
||||
* entries should *NOT* have the global bit set! This also
|
||||
* creates a bunch of nonsense entries but that is fine --
|
||||
* it avoids problems around wraparound.
|
||||
*/
|
||||
leaq _text(%rip), %rdi
|
||||
andq $PMD_PAGE_MASK, %rdi
|
||||
leaq early_level4_pgt(%rip), %rbx
|
||||
|
||||
movq %rdi, %rax
|
||||
shrq $PGDIR_SHIFT, %rax
|
||||
|
||||
leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
|
||||
movq %rdx, 0(%rbx,%rax,8)
|
||||
movq %rdx, 8(%rbx,%rax,8)
|
||||
|
||||
addq $4096, %rdx
|
||||
movq %rdi, %rax
|
||||
shrq $PUD_SHIFT, %rax
|
||||
andq $(PTRS_PER_PUD - 1), %rax
|
||||
jz ident_complete
|
||||
|
||||
leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
|
||||
leaq level3_ident_pgt(%rip), %rbx
|
||||
movq %rdx, 0(%rbx, %rax, 8)
|
||||
andl $(PTRS_PER_PUD-1), %eax
|
||||
movq %rdx, (4096+0)(%rbx,%rax,8)
|
||||
movq %rdx, (4096+8)(%rbx,%rax,8)
|
||||
|
||||
addq $8192, %rbx
|
||||
movq %rdi, %rax
|
||||
shrq $PMD_SHIFT, %rax
|
||||
andq $(PTRS_PER_PMD - 1), %rax
|
||||
leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
|
||||
leaq level2_spare_pgt(%rip), %rbx
|
||||
movq %rdx, 0(%rbx, %rax, 8)
|
||||
ident_complete:
|
||||
shrq $PMD_SHIFT, %rdi
|
||||
addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
|
||||
leaq (_end - 1)(%rip), %rcx
|
||||
shrq $PMD_SHIFT, %rcx
|
||||
subq %rdi, %rcx
|
||||
incl %ecx
|
||||
|
||||
1:
|
||||
andq $(PTRS_PER_PMD - 1), %rdi
|
||||
movq %rax, (%rbx,%rdi,8)
|
||||
incq %rdi
|
||||
addq $PMD_SIZE, %rax
|
||||
decl %ecx
|
||||
jnz 1b
|
||||
|
||||
/*
|
||||
* Fixup the kernel text+data virtual addresses. Note that
|
||||
@ -124,7 +141,6 @@ ident_complete:
|
||||
* cleanup_highmap() fixes this up along with the mappings
|
||||
* beyond _end.
|
||||
*/
|
||||
|
||||
leaq level2_kernel_pgt(%rip), %rdi
|
||||
leaq 4096(%rdi), %r8
|
||||
/* See if it is a valid page table entry */
|
||||
@ -139,17 +155,14 @@ ident_complete:
|
||||
/* Fixup phys_base */
|
||||
addq %rbp, phys_base(%rip)
|
||||
|
||||
/* Due to ENTRY(), sometimes the empty space gets filled with
|
||||
* zeros. Better take a jmp than relying on empty space being
|
||||
* filled with 0x90 (nop)
|
||||
*/
|
||||
jmp secondary_startup_64
|
||||
movq $(early_level4_pgt - __START_KERNEL_map), %rax
|
||||
jmp 1f
|
||||
ENTRY(secondary_startup_64)
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
|
||||
* and someone has loaded a mapped page table.
|
||||
*
|
||||
* %esi holds a physical pointer to real_mode_data.
|
||||
* %rsi holds a physical pointer to real_mode_data.
|
||||
*
|
||||
* We come here either from startup_64 (using physical addresses)
|
||||
* or from trampoline.S (using virtual addresses).
|
||||
@ -159,12 +172,14 @@ ENTRY(secondary_startup_64)
|
||||
* after the boot processor executes this code.
|
||||
*/
|
||||
|
||||
movq $(init_level4_pgt - __START_KERNEL_map), %rax
|
||||
1:
|
||||
|
||||
/* Enable PAE mode and PGE */
|
||||
movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
|
||||
movq %rax, %cr4
|
||||
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
|
||||
movq %rcx, %cr4
|
||||
|
||||
/* Setup early boot stage 4 level pagetables. */
|
||||
movq $(init_level4_pgt - __START_KERNEL_map), %rax
|
||||
addq phys_base(%rip), %rax
|
||||
movq %rax, %cr3
|
||||
|
||||
@ -196,7 +211,7 @@ ENTRY(secondary_startup_64)
|
||||
movq %rax, %cr0
|
||||
|
||||
/* Setup a boot time stack */
|
||||
movq stack_start(%rip),%rsp
|
||||
movq stack_start(%rip), %rsp
|
||||
|
||||
/* zero EFLAGS after setting rsp */
|
||||
pushq $0
|
||||
@ -236,15 +251,33 @@ ENTRY(secondary_startup_64)
|
||||
movl initial_gs+4(%rip),%edx
|
||||
wrmsr
|
||||
|
||||
/* esi is pointer to real mode structure with interesting info.
|
||||
/* rsi is pointer to real mode structure with interesting info.
|
||||
pass it to C */
|
||||
movl %esi, %edi
|
||||
movq %rsi, %rdi
|
||||
|
||||
/* Finally jump to run C code and to be on real kernel address
|
||||
* Since we are running on identity-mapped space we have to jump
|
||||
* to the full 64bit address, this is only possible as indirect
|
||||
* jump. In addition we need to ensure %cs is set so we make this
|
||||
* a far return.
|
||||
*
|
||||
* Note: do not change to far jump indirect with 64bit offset.
|
||||
*
|
||||
* AMD does not support far jump indirect with 64bit offset.
|
||||
* AMD64 Architecture Programmer's Manual, Volume 3: states only
|
||||
* JMP FAR mem16:16 FF /5 Far jump indirect,
|
||||
* with the target specified by a far pointer in memory.
|
||||
* JMP FAR mem16:32 FF /5 Far jump indirect,
|
||||
* with the target specified by a far pointer in memory.
|
||||
*
|
||||
* Intel64 does support 64bit offset.
|
||||
* Software Developer Manual Vol 2: states:
|
||||
* FF /5 JMP m16:16 Jump far, absolute indirect,
|
||||
* address given in m16:16
|
||||
* FF /5 JMP m16:32 Jump far, absolute indirect,
|
||||
* address given in m16:32.
|
||||
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
|
||||
* address given in m16:64.
|
||||
*/
|
||||
movq initial_code(%rip),%rax
|
||||
pushq $0 # fake return address to stop unwinder
|
||||
@ -270,13 +303,13 @@ ENDPROC(start_cpu0)
|
||||
|
||||
/* SMP bootup changes these two */
|
||||
__REFDATA
|
||||
.align 8
|
||||
ENTRY(initial_code)
|
||||
.balign 8
|
||||
GLOBAL(initial_code)
|
||||
.quad x86_64_start_kernel
|
||||
ENTRY(initial_gs)
|
||||
GLOBAL(initial_gs)
|
||||
.quad INIT_PER_CPU_VAR(irq_stack_union)
|
||||
|
||||
ENTRY(stack_start)
|
||||
GLOBAL(stack_start)
|
||||
.quad init_thread_union+THREAD_SIZE-8
|
||||
.word 0
|
||||
__FINITDATA
|
||||
@ -284,7 +317,7 @@ ENDPROC(start_cpu0)
|
||||
bad_address:
|
||||
jmp bad_address
|
||||
|
||||
.section ".init.text","ax"
|
||||
__INIT
|
||||
.globl early_idt_handlers
|
||||
early_idt_handlers:
|
||||
# 104(%rsp) %rflags
|
||||
@ -321,14 +354,22 @@ ENTRY(early_idt_handler)
|
||||
pushq %r11 # 0(%rsp)
|
||||
|
||||
cmpl $__KERNEL_CS,96(%rsp)
|
||||
jne 10f
|
||||
jne 11f
|
||||
|
||||
cmpl $14,72(%rsp) # Page fault?
|
||||
jnz 10f
|
||||
GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
|
||||
call early_make_pgtable
|
||||
andl %eax,%eax
|
||||
jz 20f # All good
|
||||
|
||||
10:
|
||||
leaq 88(%rsp),%rdi # Pointer to %rip
|
||||
call early_fixup_exception
|
||||
andl %eax,%eax
|
||||
jnz 20f # Found an exception entry
|
||||
|
||||
10:
|
||||
11:
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
GET_CR2_INTO(%r9) # can clobber any volatile register if pv
|
||||
movl 80(%rsp),%r8d # error code
|
||||
@ -350,7 +391,7 @@ ENTRY(early_idt_handler)
|
||||
1: hlt
|
||||
jmp 1b
|
||||
|
||||
20: # Exception table entry found
|
||||
20: # Exception table entry found or page table generated
|
||||
popq %r11
|
||||
popq %r10
|
||||
popq %r9
|
||||
@ -364,6 +405,8 @@ ENTRY(early_idt_handler)
|
||||
decl early_recursion_flag(%rip)
|
||||
INTERRUPT_RETURN
|
||||
|
||||
__INITDATA
|
||||
|
||||
.balign 4
|
||||
early_recursion_flag:
|
||||
.long 0
|
||||
@ -374,11 +417,10 @@ early_idt_msg:
|
||||
early_idt_ripmsg:
|
||||
.asciz "RIP %s\n"
|
||||
#endif /* CONFIG_EARLY_PRINTK */
|
||||
.previous
|
||||
|
||||
#define NEXT_PAGE(name) \
|
||||
.balign PAGE_SIZE; \
|
||||
ENTRY(name)
|
||||
GLOBAL(name)
|
||||
|
||||
/* Automate the creation of 1 to 1 mapping pmd entries */
|
||||
#define PMDS(START, PERM, COUNT) \
|
||||
@ -388,24 +430,37 @@ ENTRY(name)
|
||||
i = i + 1 ; \
|
||||
.endr
|
||||
|
||||
.data
|
||||
/*
|
||||
* This default setting generates an ident mapping at address 0x100000
|
||||
* and a mapping for the kernel that precisely maps virtual address
|
||||
* 0xffffffff80000000 to physical address 0x000000. (always using
|
||||
* 2Mbyte large pages provided by PAE mode)
|
||||
*/
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
__INITDATA
|
||||
NEXT_PAGE(early_level4_pgt)
|
||||
.fill 511,8,0
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
|
||||
NEXT_PAGE(early_dynamic_pgts)
|
||||
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
||||
|
||||
.data
|
||||
|
||||
#ifndef CONFIG_XEN
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
.fill 512,8,0
|
||||
#else
|
||||
NEXT_PAGE(init_level4_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.org init_level4_pgt + L4_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
|
||||
NEXT_PAGE(level3_ident_pgt)
|
||||
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.fill 511,8,0
|
||||
.fill 511, 8, 0
|
||||
NEXT_PAGE(level2_ident_pgt)
|
||||
/* Since I easily can, map the first 1G.
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
#endif
|
||||
|
||||
NEXT_PAGE(level3_kernel_pgt)
|
||||
.fill L3_START_KERNEL,8,0
|
||||
@ -413,21 +468,6 @@ NEXT_PAGE(level3_kernel_pgt)
|
||||
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
|
||||
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
|
||||
NEXT_PAGE(level2_fixmap_pgt)
|
||||
.fill 506,8,0
|
||||
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
|
||||
.fill 5,8,0
|
||||
|
||||
NEXT_PAGE(level1_fixmap_pgt)
|
||||
.fill 512,8,0
|
||||
|
||||
NEXT_PAGE(level2_ident_pgt)
|
||||
/* Since I easily can, map the first 1G.
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
|
||||
NEXT_PAGE(level2_kernel_pgt)
|
||||
/*
|
||||
* 512 MB kernel mapping. We spend a full page on this pagetable
|
||||
@ -442,11 +482,16 @@ NEXT_PAGE(level2_kernel_pgt)
|
||||
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
|
||||
KERNEL_IMAGE_SIZE/PMD_SIZE)
|
||||
|
||||
NEXT_PAGE(level2_spare_pgt)
|
||||
.fill 512, 8, 0
|
||||
NEXT_PAGE(level2_fixmap_pgt)
|
||||
.fill 506,8,0
|
||||
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
|
||||
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
|
||||
.fill 5,8,0
|
||||
|
||||
NEXT_PAGE(level1_fixmap_pgt)
|
||||
.fill 512,8,0
|
||||
|
||||
#undef PMDS
|
||||
#undef NEXT_PAGE
|
||||
|
||||
.data
|
||||
.align 16
|
||||
@ -472,6 +517,5 @@ ENTRY(nmi_idt_table)
|
||||
.skip IDT_ENTRIES * 16
|
||||
|
||||
__PAGE_ALIGNED_BSS
|
||||
.align PAGE_SIZE
|
||||
ENTRY(empty_zero_page)
|
||||
NEXT_PAGE(empty_zero_page)
|
||||
.skip PAGE_SIZE
|
||||
|
@ -16,125 +16,12 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/suspend.h>
|
||||
|
||||
#include <asm/init.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
|
||||
unsigned long addr)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
struct page *page;
|
||||
int result = -ENOMEM;
|
||||
|
||||
addr &= PMD_MASK;
|
||||
pgd += pgd_index(addr);
|
||||
if (!pgd_present(*pgd)) {
|
||||
page = kimage_alloc_control_pages(image, 0);
|
||||
if (!page)
|
||||
goto out;
|
||||
pud = (pud_t *)page_address(page);
|
||||
clear_page(pud);
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
pud = pud_offset(pgd, addr);
|
||||
if (!pud_present(*pud)) {
|
||||
page = kimage_alloc_control_pages(image, 0);
|
||||
if (!page)
|
||||
goto out;
|
||||
pmd = (pmd_t *)page_address(page);
|
||||
clear_page(pmd);
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
}
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (!pmd_present(*pmd))
|
||||
set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
|
||||
result = 0;
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void init_level2_page(pmd_t *level2p, unsigned long addr)
|
||||
{
|
||||
unsigned long end_addr;
|
||||
|
||||
addr &= PAGE_MASK;
|
||||
end_addr = addr + PUD_SIZE;
|
||||
while (addr < end_addr) {
|
||||
set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
|
||||
addr += PMD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static int init_level3_page(struct kimage *image, pud_t *level3p,
|
||||
unsigned long addr, unsigned long last_addr)
|
||||
{
|
||||
unsigned long end_addr;
|
||||
int result;
|
||||
|
||||
result = 0;
|
||||
addr &= PAGE_MASK;
|
||||
end_addr = addr + PGDIR_SIZE;
|
||||
while ((addr < last_addr) && (addr < end_addr)) {
|
||||
struct page *page;
|
||||
pmd_t *level2p;
|
||||
|
||||
page = kimage_alloc_control_pages(image, 0);
|
||||
if (!page) {
|
||||
result = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
level2p = (pmd_t *)page_address(page);
|
||||
init_level2_page(level2p, addr);
|
||||
set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
|
||||
addr += PUD_SIZE;
|
||||
}
|
||||
/* clear the unused entries */
|
||||
while (addr < end_addr) {
|
||||
pud_clear(level3p++);
|
||||
addr += PUD_SIZE;
|
||||
}
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static int init_level4_page(struct kimage *image, pgd_t *level4p,
|
||||
unsigned long addr, unsigned long last_addr)
|
||||
{
|
||||
unsigned long end_addr;
|
||||
int result;
|
||||
|
||||
result = 0;
|
||||
addr &= PAGE_MASK;
|
||||
end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
|
||||
while ((addr < last_addr) && (addr < end_addr)) {
|
||||
struct page *page;
|
||||
pud_t *level3p;
|
||||
|
||||
page = kimage_alloc_control_pages(image, 0);
|
||||
if (!page) {
|
||||
result = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
level3p = (pud_t *)page_address(page);
|
||||
result = init_level3_page(image, level3p, addr, last_addr);
|
||||
if (result)
|
||||
goto out;
|
||||
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
|
||||
addr += PGDIR_SIZE;
|
||||
}
|
||||
/* clear the unused entries */
|
||||
while (addr < end_addr) {
|
||||
pgd_clear(level4p++);
|
||||
addr += PGDIR_SIZE;
|
||||
}
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void free_transition_pgtable(struct kimage *image)
|
||||
{
|
||||
free_page((unsigned long)image->arch.pud);
|
||||
@ -184,22 +71,62 @@ err:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void *alloc_pgt_page(void *data)
|
||||
{
|
||||
struct kimage *image = (struct kimage *)data;
|
||||
struct page *page;
|
||||
void *p = NULL;
|
||||
|
||||
page = kimage_alloc_control_pages(image, 0);
|
||||
if (page) {
|
||||
p = page_address(page);
|
||||
clear_page(p);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
{
|
||||
struct x86_mapping_info info = {
|
||||
.alloc_pgt_page = alloc_pgt_page,
|
||||
.context = image,
|
||||
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
|
||||
};
|
||||
unsigned long mstart, mend;
|
||||
pgd_t *level4p;
|
||||
int result;
|
||||
int i;
|
||||
|
||||
level4p = (pgd_t *)__va(start_pgtable);
|
||||
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
|
||||
if (result)
|
||||
return result;
|
||||
clear_page(level4p);
|
||||
for (i = 0; i < nr_pfn_mapped; i++) {
|
||||
mstart = pfn_mapped[i].start << PAGE_SHIFT;
|
||||
mend = pfn_mapped[i].end << PAGE_SHIFT;
|
||||
|
||||
result = kernel_ident_mapping_init(&info,
|
||||
level4p, mstart, mend);
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* image->start may be outside 0 ~ max_pfn, for example when
|
||||
* jump back to original kernel from kexeced kernel
|
||||
* segments's mem ranges could be outside 0 ~ max_pfn,
|
||||
* for example when jump back to original kernel from kexeced kernel.
|
||||
* or first kernel is booted with user mem map, and second kernel
|
||||
* could be loaded out of that range.
|
||||
*/
|
||||
result = init_one_level2_page(image, level4p, image->start);
|
||||
if (result)
|
||||
return result;
|
||||
for (i = 0; i < image->nr_segments; i++) {
|
||||
mstart = image->segment[i].mem;
|
||||
mend = mstart + image->segment[i].memsz;
|
||||
|
||||
result = kernel_ident_mapping_init(&info,
|
||||
level4p, mstart, mend);
|
||||
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
|
||||
return init_transition_pgtable(image, level4p);
|
||||
}
|
||||
|
||||
|
@ -108,17 +108,16 @@
|
||||
#include <asm/topology.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/amd_nb.h>
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/numa_64.h>
|
||||
#endif
|
||||
#include <asm/mce.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/prom.h>
|
||||
|
||||
/*
|
||||
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
||||
* The direct mapping extends to max_pfn_mapped, so that we can directly access
|
||||
* apertures, ACPI and other tables without having to play with fixmaps.
|
||||
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
|
||||
* max_pfn_mapped: highest direct mapped pfn over 4GB
|
||||
*
|
||||
* The direct mapping only covers E820_RAM regions, so the ranges and gaps are
|
||||
* represented by pfn_mapped
|
||||
*/
|
||||
unsigned long max_low_pfn_mapped;
|
||||
unsigned long max_pfn_mapped;
|
||||
@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void __init init_gbpages(void)
|
||||
{
|
||||
if (direct_gbpages && cpu_has_gbpages)
|
||||
printk(KERN_INFO "Using GB pages for direct mapping\n");
|
||||
else
|
||||
direct_gbpages = 0;
|
||||
}
|
||||
#else
|
||||
static inline void init_gbpages(void)
|
||||
{
|
||||
}
|
||||
#ifdef CONFIG_X86_32
|
||||
static void __init cleanup_highmap(void)
|
||||
{
|
||||
}
|
||||
@ -306,27 +294,43 @@ static void __init reserve_brk(void)
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
|
||||
static u64 __init get_ramdisk_image(void)
|
||||
{
|
||||
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
|
||||
ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
|
||||
|
||||
return ramdisk_image;
|
||||
}
|
||||
static u64 __init get_ramdisk_size(void)
|
||||
{
|
||||
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
|
||||
ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
|
||||
|
||||
return ramdisk_size;
|
||||
}
|
||||
|
||||
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
|
||||
static void __init relocate_initrd(void)
|
||||
{
|
||||
/* Assume only end is not page aligned */
|
||||
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
u64 ramdisk_image = get_ramdisk_image();
|
||||
u64 ramdisk_size = get_ramdisk_size();
|
||||
u64 area_size = PAGE_ALIGN(ramdisk_size);
|
||||
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
|
||||
u64 ramdisk_here;
|
||||
unsigned long slop, clen, mapaddr;
|
||||
char *p, *q;
|
||||
|
||||
/* We need to move the initrd down into lowmem */
|
||||
ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
|
||||
PAGE_SIZE);
|
||||
/* We need to move the initrd down into directly mapped mem */
|
||||
ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
|
||||
area_size, PAGE_SIZE);
|
||||
|
||||
if (!ramdisk_here)
|
||||
panic("Cannot find place for new RAMDISK of size %lld\n",
|
||||
ramdisk_size);
|
||||
|
||||
/* Note: this includes all the lowmem currently occupied by
|
||||
/* Note: this includes all the mem currently occupied by
|
||||
the initrd, we rely on that fact to keep the data intact. */
|
||||
memblock_reserve(ramdisk_here, area_size);
|
||||
initrd_start = ramdisk_here + PAGE_OFFSET;
|
||||
@ -336,17 +340,7 @@ static void __init relocate_initrd(void)
|
||||
|
||||
q = (char *)initrd_start;
|
||||
|
||||
/* Copy any lowmem portion of the initrd */
|
||||
if (ramdisk_image < end_of_lowmem) {
|
||||
clen = end_of_lowmem - ramdisk_image;
|
||||
p = (char *)__va(ramdisk_image);
|
||||
memcpy(q, p, clen);
|
||||
q += clen;
|
||||
ramdisk_image += clen;
|
||||
ramdisk_size -= clen;
|
||||
}
|
||||
|
||||
/* Copy the highmem portion of the initrd */
|
||||
/* Copy the initrd */
|
||||
while (ramdisk_size) {
|
||||
slop = ramdisk_image & ~PAGE_MASK;
|
||||
clen = ramdisk_size;
|
||||
@ -360,22 +354,35 @@ static void __init relocate_initrd(void)
|
||||
ramdisk_image += clen;
|
||||
ramdisk_size -= clen;
|
||||
}
|
||||
/* high pages is not converted by early_res_to_bootmem */
|
||||
ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
|
||||
ramdisk_image = get_ramdisk_image();
|
||||
ramdisk_size = get_ramdisk_size();
|
||||
printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
|
||||
" [mem %#010llx-%#010llx]\n",
|
||||
ramdisk_image, ramdisk_image + ramdisk_size - 1,
|
||||
ramdisk_here, ramdisk_here + ramdisk_size - 1);
|
||||
}
|
||||
|
||||
static void __init early_reserve_initrd(void)
|
||||
{
|
||||
/* Assume only end is not page aligned */
|
||||
u64 ramdisk_image = get_ramdisk_image();
|
||||
u64 ramdisk_size = get_ramdisk_size();
|
||||
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
|
||||
|
||||
if (!boot_params.hdr.type_of_loader ||
|
||||
!ramdisk_image || !ramdisk_size)
|
||||
return; /* No initrd provided by bootloader */
|
||||
|
||||
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
|
||||
}
|
||||
static void __init reserve_initrd(void)
|
||||
{
|
||||
/* Assume only end is not page aligned */
|
||||
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||
u64 ramdisk_image = get_ramdisk_image();
|
||||
u64 ramdisk_size = get_ramdisk_size();
|
||||
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
|
||||
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
|
||||
u64 mapped_size;
|
||||
|
||||
if (!boot_params.hdr.type_of_loader ||
|
||||
!ramdisk_image || !ramdisk_size)
|
||||
@ -383,22 +390,18 @@ static void __init reserve_initrd(void)
|
||||
|
||||
initrd_start = 0;
|
||||
|
||||
if (ramdisk_size >= (end_of_lowmem>>1)) {
|
||||
mapped_size = memblock_mem_size(max_pfn_mapped);
|
||||
if (ramdisk_size >= (mapped_size>>1))
|
||||
panic("initrd too large to handle, "
|
||||
"disabling initrd (%lld needed, %lld available)\n",
|
||||
ramdisk_size, end_of_lowmem>>1);
|
||||
}
|
||||
ramdisk_size, mapped_size>>1);
|
||||
|
||||
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
|
||||
ramdisk_end - 1);
|
||||
|
||||
|
||||
if (ramdisk_end <= end_of_lowmem) {
|
||||
/* All in lowmem, easy case */
|
||||
/*
|
||||
* don't need to reserve again, already reserved early
|
||||
* in i386_start_kernel
|
||||
*/
|
||||
if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
|
||||
PFN_DOWN(ramdisk_end))) {
|
||||
/* All are mapped, easy case */
|
||||
initrd_start = ramdisk_image + PAGE_OFFSET;
|
||||
initrd_end = initrd_start + ramdisk_size;
|
||||
return;
|
||||
@ -409,6 +412,9 @@ static void __init reserve_initrd(void)
|
||||
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
|
||||
}
|
||||
#else
|
||||
static void __init early_reserve_initrd(void)
|
||||
{
|
||||
}
|
||||
static void __init reserve_initrd(void)
|
||||
{
|
||||
}
|
||||
@ -419,8 +425,6 @@ static void __init parse_setup_data(void)
|
||||
struct setup_data *data;
|
||||
u64 pa_data;
|
||||
|
||||
if (boot_params.hdr.version < 0x0209)
|
||||
return;
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
while (pa_data) {
|
||||
u32 data_len, map_len;
|
||||
@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void)
|
||||
u64 pa_data;
|
||||
int found = 0;
|
||||
|
||||
if (boot_params.hdr.version < 0x0209)
|
||||
return;
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
while (pa_data) {
|
||||
data = early_memremap(pa_data, sizeof(*data));
|
||||
@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
|
||||
struct setup_data *data;
|
||||
u64 pa_data;
|
||||
|
||||
if (boot_params.hdr.version < 0x0209)
|
||||
return;
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
while (pa_data) {
|
||||
data = early_memremap(pa_data, sizeof(*data));
|
||||
@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
|
||||
/*
|
||||
* Keep the crash kernel below this limit. On 32 bits earlier kernels
|
||||
* would limit the kernel to the low 512 MiB due to mapping restrictions.
|
||||
* On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
|
||||
* limit once kexec-tools are fixed.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
|
||||
#else
|
||||
# define CRASH_KERNEL_ADDR_MAX (896 << 20)
|
||||
# define CRASH_KERNEL_ADDR_MAX MAXMEM
|
||||
#endif
|
||||
|
||||
static void __init reserve_crashkernel_low(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
const unsigned long long alignment = 16<<20; /* 16M */
|
||||
unsigned long long low_base = 0, low_size = 0;
|
||||
unsigned long total_low_mem;
|
||||
unsigned long long base;
|
||||
int ret;
|
||||
|
||||
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
|
||||
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
|
||||
&low_size, &base);
|
||||
if (ret != 0 || low_size <= 0)
|
||||
return;
|
||||
|
||||
low_base = memblock_find_in_range(low_size, (1ULL<<32),
|
||||
low_size, alignment);
|
||||
|
||||
if (!low_base) {
|
||||
pr_info("crashkernel low reservation failed - No suitable area found.\n");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
memblock_reserve(low_base, low_size);
|
||||
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
|
||||
(unsigned long)(low_size >> 20),
|
||||
(unsigned long)(low_base >> 20),
|
||||
(unsigned long)(total_low_mem >> 20));
|
||||
crashk_low_res.start = low_base;
|
||||
crashk_low_res.end = low_base + low_size - 1;
|
||||
insert_resource(&iomem_resource, &crashk_low_res);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init reserve_crashkernel(void)
|
||||
{
|
||||
const unsigned long long alignment = 16<<20; /* 16M */
|
||||
unsigned long long total_mem;
|
||||
unsigned long long crash_size, crash_base;
|
||||
int ret;
|
||||
@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void)
|
||||
|
||||
/* 0 means: find the address automatically */
|
||||
if (crash_base <= 0) {
|
||||
const unsigned long long alignment = 16<<20; /* 16M */
|
||||
|
||||
/*
|
||||
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
|
||||
*/
|
||||
@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void)
|
||||
pr_info("crashkernel reservation failed - No suitable area found.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
} else {
|
||||
unsigned long long start;
|
||||
|
||||
@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void)
|
||||
crashk_res.start = crash_base;
|
||||
crashk_res.end = crash_base + crash_size - 1;
|
||||
insert_resource(&iomem_resource, &crashk_res);
|
||||
|
||||
if (crash_base >= (1ULL<<32))
|
||||
reserve_crashkernel_low();
|
||||
}
|
||||
#else
|
||||
static void __init reserve_crashkernel(void)
|
||||
@ -708,6 +744,27 @@ static void __init trim_bios_range(void)
|
||||
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
||||
}
|
||||
|
||||
/* called before trim_bios_range() to spare extra sanitize */
|
||||
static void __init e820_add_kernel_range(void)
|
||||
{
|
||||
u64 start = __pa_symbol(_text);
|
||||
u64 size = __pa_symbol(_end) - start;
|
||||
|
||||
/*
|
||||
* Complain if .text .data and .bss are not marked as E820_RAM and
|
||||
* attempt to fix it by adding the range. We may have a confused BIOS,
|
||||
* or the user may have used memmap=exactmap or memmap=xxM$yyM to
|
||||
* exclude kernel range. If we really are running on top non-RAM,
|
||||
* we will crash later anyways.
|
||||
*/
|
||||
if (e820_all_mapped(start, start + size, E820_RAM))
|
||||
return;
|
||||
|
||||
pr_warn(".text .data .bss are not marked as E820_RAM!\n");
|
||||
e820_remove_range(start, size, E820_RAM, 0);
|
||||
e820_add_region(start, size, E820_RAM);
|
||||
}
|
||||
|
||||
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
|
||||
|
||||
static int __init parse_reservelow(char *p)
|
||||
@ -752,6 +809,17 @@ static void __init trim_low_memory_range(void)
|
||||
|
||||
void __init setup_arch(char **cmdline_p)
|
||||
{
|
||||
memblock_reserve(__pa_symbol(_text),
|
||||
(unsigned long)__bss_stop - (unsigned long)_text);
|
||||
|
||||
early_reserve_initrd();
|
||||
|
||||
/*
|
||||
* At this point everything still needed from the boot loader
|
||||
* or BIOS or kernel text should be early reserved or marked not
|
||||
* RAM in e820. All other memory is free game.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
|
||||
visws_early_detect();
|
||||
@ -910,6 +978,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
insert_resource(&iomem_resource, &data_resource);
|
||||
insert_resource(&iomem_resource, &bss_resource);
|
||||
|
||||
e820_add_kernel_range();
|
||||
trim_bios_range();
|
||||
#ifdef CONFIG_X86_32
|
||||
if (ppro_with_ram_bug()) {
|
||||
@ -959,6 +1028,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
reserve_ibft_region();
|
||||
|
||||
early_alloc_pgt_buf();
|
||||
|
||||
/*
|
||||
* Need to conclude brk, before memblock_x86_fill()
|
||||
* it could use memblock_find_in_range, could overlap with
|
||||
@ -968,7 +1039,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
cleanup_highmap();
|
||||
|
||||
memblock.current_limit = get_max_mapped();
|
||||
memblock.current_limit = ISA_END_ADDRESS;
|
||||
memblock_x86_fill();
|
||||
|
||||
/*
|
||||
@ -985,42 +1056,22 @@ void __init setup_arch(char **cmdline_p)
|
||||
setup_bios_corruption_check();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
|
||||
(max_pfn_mapped<<PAGE_SHIFT) - 1);
|
||||
#endif
|
||||
|
||||
setup_real_mode();
|
||||
reserve_real_mode();
|
||||
|
||||
trim_platform_memory_ranges();
|
||||
trim_low_memory_range();
|
||||
|
||||
init_gbpages();
|
||||
init_mem_mapping();
|
||||
|
||||
/* max_pfn_mapped is updated here */
|
||||
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
|
||||
max_pfn_mapped = max_low_pfn_mapped;
|
||||
early_trap_pf_init();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (max_pfn > max_low_pfn) {
|
||||
int i;
|
||||
unsigned long start, end;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
setup_real_mode();
|
||||
|
||||
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
|
||||
NULL) {
|
||||
|
||||
end = PFN_PHYS(end_pfn);
|
||||
if (end <= (1UL<<32))
|
||||
continue;
|
||||
|
||||
start = PFN_PHYS(start_pfn);
|
||||
max_pfn_mapped = init_memory_mapping(
|
||||
max((1UL<<32), start), end);
|
||||
}
|
||||
|
||||
/* can we preseve max_low_pfn ?*/
|
||||
max_low_pfn = max_pfn;
|
||||
}
|
||||
#endif
|
||||
memblock.current_limit = get_max_mapped();
|
||||
dma_contiguous_reserve(0);
|
||||
|
||||
|
@ -688,10 +688,19 @@ void __init early_trap_init(void)
|
||||
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
|
||||
/* int3 can be called from all */
|
||||
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
|
||||
#ifdef CONFIG_X86_32
|
||||
set_intr_gate(X86_TRAP_PF, &page_fault);
|
||||
#endif
|
||||
load_idt(&idt_descr);
|
||||
}
|
||||
|
||||
void __init early_trap_pf_init(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
set_intr_gate(X86_TRAP_PF, &page_fault);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init trap_init(void)
|
||||
{
|
||||
int i;
|
||||
|
@ -62,10 +62,6 @@ struct x86_init_ops x86_init __initdata = {
|
||||
.banner = default_banner,
|
||||
},
|
||||
|
||||
.mapping = {
|
||||
.pagetable_reserve = native_pagetable_reserve,
|
||||
},
|
||||
|
||||
.paging = {
|
||||
.pagetable_init = native_pagetable_init,
|
||||
},
|
||||
|
@ -17,9 +17,80 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/dma.h> /* for MAX_DMA_PFN */
|
||||
|
||||
unsigned long __initdata pgt_buf_start;
|
||||
unsigned long __meminitdata pgt_buf_end;
|
||||
unsigned long __meminitdata pgt_buf_top;
|
||||
#include "mm_internal.h"
|
||||
|
||||
static unsigned long __initdata pgt_buf_start;
|
||||
static unsigned long __initdata pgt_buf_end;
|
||||
static unsigned long __initdata pgt_buf_top;
|
||||
|
||||
static unsigned long min_pfn_mapped;
|
||||
|
||||
static bool __initdata can_use_brk_pgt = true;
|
||||
|
||||
/*
|
||||
* Pages returned are already directly mapped.
|
||||
*
|
||||
* Changing that is likely to break Xen, see commit:
|
||||
*
|
||||
* 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
|
||||
*
|
||||
* for detailed information.
|
||||
*/
|
||||
__ref void *alloc_low_pages(unsigned int num)
|
||||
{
|
||||
unsigned long pfn;
|
||||
int i;
|
||||
|
||||
if (after_bootmem) {
|
||||
unsigned int order;
|
||||
|
||||
order = get_order((unsigned long)num << PAGE_SHIFT);
|
||||
return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
|
||||
__GFP_ZERO, order);
|
||||
}
|
||||
|
||||
if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
|
||||
unsigned long ret;
|
||||
if (min_pfn_mapped >= max_pfn_mapped)
|
||||
panic("alloc_low_page: ran out of memory");
|
||||
ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
|
||||
max_pfn_mapped << PAGE_SHIFT,
|
||||
PAGE_SIZE * num , PAGE_SIZE);
|
||||
if (!ret)
|
||||
panic("alloc_low_page: can not alloc memory");
|
||||
memblock_reserve(ret, PAGE_SIZE * num);
|
||||
pfn = ret >> PAGE_SHIFT;
|
||||
} else {
|
||||
pfn = pgt_buf_end;
|
||||
pgt_buf_end += num;
|
||||
printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
|
||||
pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
|
||||
}
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
void *adr;
|
||||
|
||||
adr = __va((pfn + i) << PAGE_SHIFT);
|
||||
clear_page(adr);
|
||||
}
|
||||
|
||||
return __va(pfn << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
|
||||
#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE)
|
||||
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
|
||||
void __init early_alloc_pgt_buf(void)
|
||||
{
|
||||
unsigned long tables = INIT_PGT_BUF_SIZE;
|
||||
phys_addr_t base;
|
||||
|
||||
base = __pa(extend_brk(tables, PAGE_SIZE));
|
||||
|
||||
pgt_buf_start = base >> PAGE_SHIFT;
|
||||
pgt_buf_end = pgt_buf_start;
|
||||
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
int after_bootmem;
|
||||
|
||||
@ -29,74 +100,49 @@ int direct_gbpages
|
||||
#endif
|
||||
;
|
||||
|
||||
static void __init init_gbpages(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (direct_gbpages && cpu_has_gbpages)
|
||||
printk(KERN_INFO "Using GB pages for direct mapping\n");
|
||||
else
|
||||
direct_gbpages = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct map_range {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
unsigned page_size_mask;
|
||||
};
|
||||
|
||||
/*
|
||||
* First calculate space needed for kernel direct mapping page tables to cover
|
||||
* mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
|
||||
* pages. Then find enough contiguous space for those page tables.
|
||||
*/
|
||||
static void __init find_early_table_space(struct map_range *mr, int nr_range)
|
||||
static int page_size_mask;
|
||||
|
||||
static void __init probe_page_size_mask(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long puds = 0, pmds = 0, ptes = 0, tables;
|
||||
unsigned long start = 0, good_end;
|
||||
phys_addr_t base;
|
||||
init_gbpages();
|
||||
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
unsigned long range, extra;
|
||||
|
||||
range = mr[i].end - mr[i].start;
|
||||
puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
|
||||
|
||||
if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
|
||||
extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
|
||||
pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
|
||||
} else {
|
||||
pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
|
||||
}
|
||||
|
||||
if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
|
||||
extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
|
||||
#ifdef CONFIG_X86_32
|
||||
extra += PMD_SIZE;
|
||||
#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
|
||||
/*
|
||||
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
||||
* This will simplify cpa(), which otherwise needs to support splitting
|
||||
* large pages into small in interrupt context, etc.
|
||||
*/
|
||||
if (direct_gbpages)
|
||||
page_size_mask |= 1 << PG_LEVEL_1G;
|
||||
if (cpu_has_pse)
|
||||
page_size_mask |= 1 << PG_LEVEL_2M;
|
||||
#endif
|
||||
ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
} else {
|
||||
ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/* Enable PSE if available */
|
||||
if (cpu_has_pse)
|
||||
set_in_cr4(X86_CR4_PSE);
|
||||
|
||||
/* Enable PGE if available */
|
||||
if (cpu_has_pge) {
|
||||
set_in_cr4(X86_CR4_PGE);
|
||||
__supported_pte_mask |= _PAGE_GLOBAL;
|
||||
}
|
||||
|
||||
tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
|
||||
tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
|
||||
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* for fixmap */
|
||||
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
|
||||
#endif
|
||||
good_end = max_pfn_mapped << PAGE_SHIFT;
|
||||
|
||||
base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
|
||||
if (!base)
|
||||
panic("Cannot find space for the kernel page tables");
|
||||
|
||||
pgt_buf_start = base >> PAGE_SHIFT;
|
||||
pgt_buf_end = pgt_buf_start;
|
||||
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
|
||||
|
||||
printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
|
||||
mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
|
||||
(pgt_buf_top << PAGE_SHIFT) - 1);
|
||||
}
|
||||
|
||||
void __init native_pagetable_reserve(u64 start, u64 end)
|
||||
{
|
||||
memblock_reserve(start, end - start);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -122,58 +168,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
|
||||
* This runs before bootmem is initialized and gets pages directly from
|
||||
* the physical memory. To access them they are temporarily mapped.
|
||||
* adjust the page_size_mask for small range to go with
|
||||
* big page size instead small one if nearby are ram too.
|
||||
*/
|
||||
unsigned long __init_refok init_memory_mapping(unsigned long start,
|
||||
unsigned long end)
|
||||
static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
|
||||
int nr_range)
|
||||
{
|
||||
unsigned long page_size_mask = 0;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
unsigned long ret = 0;
|
||||
unsigned long pos;
|
||||
int i;
|
||||
|
||||
struct map_range mr[NR_RANGE_MR];
|
||||
int nr_range, i;
|
||||
int use_pse, use_gbpages;
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
if ((page_size_mask & (1<<PG_LEVEL_2M)) &&
|
||||
!(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) {
|
||||
unsigned long start = round_down(mr[i].start, PMD_SIZE);
|
||||
unsigned long end = round_up(mr[i].end, PMD_SIZE);
|
||||
|
||||
printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
|
||||
start, end - 1);
|
||||
|
||||
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
|
||||
/*
|
||||
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
||||
* This will simplify cpa(), which otherwise needs to support splitting
|
||||
* large pages into small in interrupt context, etc.
|
||||
*/
|
||||
use_pse = use_gbpages = 0;
|
||||
#else
|
||||
use_pse = cpu_has_pse;
|
||||
use_gbpages = direct_gbpages;
|
||||
#ifdef CONFIG_X86_32
|
||||
if ((end >> PAGE_SHIFT) > max_low_pfn)
|
||||
continue;
|
||||
#endif
|
||||
|
||||
/* Enable PSE if available */
|
||||
if (cpu_has_pse)
|
||||
set_in_cr4(X86_CR4_PSE);
|
||||
if (memblock_is_region_memory(start, end - start))
|
||||
mr[i].page_size_mask |= 1<<PG_LEVEL_2M;
|
||||
}
|
||||
if ((page_size_mask & (1<<PG_LEVEL_1G)) &&
|
||||
!(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) {
|
||||
unsigned long start = round_down(mr[i].start, PUD_SIZE);
|
||||
unsigned long end = round_up(mr[i].end, PUD_SIZE);
|
||||
|
||||
/* Enable PGE if available */
|
||||
if (cpu_has_pge) {
|
||||
set_in_cr4(X86_CR4_PGE);
|
||||
__supported_pte_mask |= _PAGE_GLOBAL;
|
||||
if (memblock_is_region_memory(start, end - start))
|
||||
mr[i].page_size_mask |= 1<<PG_LEVEL_1G;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (use_gbpages)
|
||||
page_size_mask |= 1 << PG_LEVEL_1G;
|
||||
if (use_pse)
|
||||
page_size_mask |= 1 << PG_LEVEL_2M;
|
||||
static int __meminit split_mem_range(struct map_range *mr, int nr_range,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
unsigned long start_pfn, end_pfn, limit_pfn;
|
||||
unsigned long pfn;
|
||||
int i;
|
||||
|
||||
memset(mr, 0, sizeof(mr));
|
||||
nr_range = 0;
|
||||
limit_pfn = PFN_DOWN(end);
|
||||
|
||||
/* head if not big page alignment ? */
|
||||
start_pfn = start >> PAGE_SHIFT;
|
||||
pos = start_pfn << PAGE_SHIFT;
|
||||
pfn = start_pfn = PFN_DOWN(start);
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Don't use a large page for the first 2/4MB of memory
|
||||
@ -181,66 +220,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
||||
* and overlapping MTRRs into large pages can cause
|
||||
* slowdowns.
|
||||
*/
|
||||
if (pos == 0)
|
||||
end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
|
||||
if (pfn == 0)
|
||||
end_pfn = PFN_DOWN(PMD_SIZE);
|
||||
else
|
||||
end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
||||
<< (PMD_SHIFT - PAGE_SHIFT);
|
||||
end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
|
||||
#else /* CONFIG_X86_64 */
|
||||
end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
||||
<< (PMD_SHIFT - PAGE_SHIFT);
|
||||
end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
|
||||
#endif
|
||||
if (end_pfn > (end >> PAGE_SHIFT))
|
||||
end_pfn = end >> PAGE_SHIFT;
|
||||
if (end_pfn > limit_pfn)
|
||||
end_pfn = limit_pfn;
|
||||
if (start_pfn < end_pfn) {
|
||||
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
||||
pos = end_pfn << PAGE_SHIFT;
|
||||
pfn = end_pfn;
|
||||
}
|
||||
|
||||
/* big page (2M) range */
|
||||
start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
||||
<< (PMD_SHIFT - PAGE_SHIFT);
|
||||
start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
|
||||
#ifdef CONFIG_X86_32
|
||||
end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
||||
end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
|
||||
#else /* CONFIG_X86_64 */
|
||||
end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
||||
<< (PUD_SHIFT - PAGE_SHIFT);
|
||||
if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
|
||||
end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
|
||||
end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
|
||||
if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE)))
|
||||
end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
|
||||
#endif
|
||||
|
||||
if (start_pfn < end_pfn) {
|
||||
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
||||
page_size_mask & (1<<PG_LEVEL_2M));
|
||||
pos = end_pfn << PAGE_SHIFT;
|
||||
pfn = end_pfn;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* big page (1G) range */
|
||||
start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
||||
<< (PUD_SHIFT - PAGE_SHIFT);
|
||||
end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
||||
start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
|
||||
end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));
|
||||
if (start_pfn < end_pfn) {
|
||||
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
||||
page_size_mask &
|
||||
((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
|
||||
pos = end_pfn << PAGE_SHIFT;
|
||||
pfn = end_pfn;
|
||||
}
|
||||
|
||||
/* tail is not big page (1G) alignment */
|
||||
start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
||||
<< (PMD_SHIFT - PAGE_SHIFT);
|
||||
end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
||||
start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
|
||||
end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
|
||||
if (start_pfn < end_pfn) {
|
||||
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
||||
page_size_mask & (1<<PG_LEVEL_2M));
|
||||
pos = end_pfn << PAGE_SHIFT;
|
||||
pfn = end_pfn;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* tail is not big page (2M) alignment */
|
||||
start_pfn = pos>>PAGE_SHIFT;
|
||||
end_pfn = end>>PAGE_SHIFT;
|
||||
start_pfn = pfn;
|
||||
end_pfn = limit_pfn;
|
||||
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
||||
|
||||
/* try to merge same page size and continuous */
|
||||
@ -257,59 +290,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
||||
nr_range--;
|
||||
}
|
||||
|
||||
if (!after_bootmem)
|
||||
adjust_range_page_size_mask(mr, nr_range);
|
||||
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
|
||||
mr[i].start, mr[i].end - 1,
|
||||
(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
|
||||
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
|
||||
|
||||
/*
|
||||
* Find space for the kernel direct mapping tables.
|
||||
*
|
||||
* Later we should allocate these tables in the local node of the
|
||||
* memory mapped. Unfortunately this is done currently before the
|
||||
* nodes are discovered.
|
||||
*/
|
||||
if (!after_bootmem)
|
||||
find_early_table_space(mr, nr_range);
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
struct range pfn_mapped[E820_X_MAX];
|
||||
int nr_pfn_mapped;
|
||||
|
||||
static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
|
||||
nr_pfn_mapped, start_pfn, end_pfn);
|
||||
nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX);
|
||||
|
||||
max_pfn_mapped = max(max_pfn_mapped, end_pfn);
|
||||
|
||||
if (start_pfn < (1UL<<(32-PAGE_SHIFT)))
|
||||
max_low_pfn_mapped = max(max_low_pfn_mapped,
|
||||
min(end_pfn, 1UL<<(32-PAGE_SHIFT)));
|
||||
}
|
||||
|
||||
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_pfn_mapped; i++)
|
||||
if ((start_pfn >= pfn_mapped[i].start) &&
|
||||
(end_pfn <= pfn_mapped[i].end))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
|
||||
* This runs before bootmem is initialized and gets pages directly from
|
||||
* the physical memory. To access them they are temporarily mapped.
|
||||
*/
|
||||
unsigned long __init_refok init_memory_mapping(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
struct map_range mr[NR_RANGE_MR];
|
||||
unsigned long ret = 0;
|
||||
int nr_range, i;
|
||||
|
||||
pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
|
||||
start, end - 1);
|
||||
|
||||
memset(mr, 0, sizeof(mr));
|
||||
nr_range = split_mem_range(mr, 0, start, end);
|
||||
|
||||
for (i = 0; i < nr_range; i++)
|
||||
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
|
||||
mr[i].page_size_mask);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
early_ioremap_page_table_range_init();
|
||||
|
||||
load_cr3(swapper_pg_dir);
|
||||
#endif
|
||||
|
||||
__flush_tlb_all();
|
||||
|
||||
/*
|
||||
* Reserve the kernel pagetable pages we used (pgt_buf_start -
|
||||
* pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
|
||||
* so that they can be reused for other purposes.
|
||||
*
|
||||
* On native it just means calling memblock_reserve, on Xen it also
|
||||
* means marking RW the pagetable pages that we allocated before
|
||||
* but that haven't been used.
|
||||
*
|
||||
* In fact on xen we mark RO the whole range pgt_buf_start -
|
||||
* pgt_buf_top, because we have to make sure that when
|
||||
* init_memory_mapping reaches the pagetable pages area, it maps
|
||||
* RO all the pagetable pages, including the ones that are beyond
|
||||
* pgt_buf_end at that time.
|
||||
*/
|
||||
if (!after_bootmem && pgt_buf_end > pgt_buf_start)
|
||||
x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
|
||||
PFN_PHYS(pgt_buf_end));
|
||||
|
||||
if (!after_bootmem)
|
||||
early_memtest(start, end);
|
||||
add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
|
||||
|
||||
return ret >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* would have hole in the middle or ends, and only ram parts will be mapped.
|
||||
*/
|
||||
static unsigned long __init init_range_memory_mapping(
|
||||
unsigned long r_start,
|
||||
unsigned long r_end)
|
||||
{
|
||||
unsigned long start_pfn, end_pfn;
|
||||
unsigned long mapped_ram_size = 0;
|
||||
int i;
|
||||
|
||||
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
|
||||
u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end);
|
||||
u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end);
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* if it is overlapping with brk pgt, we need to
|
||||
* alloc pgt buf from memblock instead.
|
||||
*/
|
||||
can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
|
||||
min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
|
||||
init_memory_mapping(start, end);
|
||||
mapped_ram_size += end - start;
|
||||
can_use_brk_pgt = true;
|
||||
}
|
||||
|
||||
return mapped_ram_size;
|
||||
}
|
||||
|
||||
/* (PUD_SHIFT-PMD_SHIFT)/2 */
|
||||
#define STEP_SIZE_SHIFT 5
|
||||
void __init init_mem_mapping(void)
|
||||
{
|
||||
unsigned long end, real_end, start, last_start;
|
||||
unsigned long step_size;
|
||||
unsigned long addr;
|
||||
unsigned long mapped_ram_size = 0;
|
||||
unsigned long new_mapped_ram_size;
|
||||
|
||||
probe_page_size_mask();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
end = max_pfn << PAGE_SHIFT;
|
||||
#else
|
||||
end = max_low_pfn << PAGE_SHIFT;
|
||||
#endif
|
||||
|
||||
/* the ISA range is always mapped regardless of memory holes */
|
||||
init_memory_mapping(0, ISA_END_ADDRESS);
|
||||
|
||||
/* xen has big range in reserved near end of ram, skip it at first */
|
||||
addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE,
|
||||
PAGE_SIZE);
|
||||
real_end = addr + PMD_SIZE;
|
||||
|
||||
/* step_size need to be small so pgt_buf from BRK could cover it */
|
||||
step_size = PMD_SIZE;
|
||||
max_pfn_mapped = 0; /* will get exact value next */
|
||||
min_pfn_mapped = real_end >> PAGE_SHIFT;
|
||||
last_start = start = real_end;
|
||||
while (last_start > ISA_END_ADDRESS) {
|
||||
if (last_start > step_size) {
|
||||
start = round_down(last_start - 1, step_size);
|
||||
if (start < ISA_END_ADDRESS)
|
||||
start = ISA_END_ADDRESS;
|
||||
} else
|
||||
start = ISA_END_ADDRESS;
|
||||
new_mapped_ram_size = init_range_memory_mapping(start,
|
||||
last_start);
|
||||
last_start = start;
|
||||
min_pfn_mapped = last_start >> PAGE_SHIFT;
|
||||
/* only increase step_size after big range get mapped */
|
||||
if (new_mapped_ram_size > mapped_ram_size)
|
||||
step_size <<= STEP_SIZE_SHIFT;
|
||||
mapped_ram_size += new_mapped_ram_size;
|
||||
}
|
||||
|
||||
if (real_end < end)
|
||||
init_range_memory_mapping(real_end, end);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (max_pfn > max_low_pfn) {
|
||||
/* can we preseve max_low_pfn ?*/
|
||||
max_low_pfn = max_pfn;
|
||||
}
|
||||
#else
|
||||
early_ioremap_page_table_range_init();
|
||||
#endif
|
||||
|
||||
load_cr3(swapper_pg_dir);
|
||||
__flush_tlb_all();
|
||||
|
||||
early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
|
||||
|
@ -53,25 +53,14 @@
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/init.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
unsigned long highstart_pfn, highend_pfn;
|
||||
|
||||
static noinline int do_test_wp_bit(void);
|
||||
|
||||
bool __read_mostly __vmalloc_start_set = false;
|
||||
|
||||
static __init void *alloc_low_page(void)
|
||||
{
|
||||
unsigned long pfn = pgt_buf_end++;
|
||||
void *adr;
|
||||
|
||||
if (pfn >= pgt_buf_top)
|
||||
panic("alloc_low_page: ran out of memory");
|
||||
|
||||
adr = __va(pfn * PAGE_SIZE);
|
||||
clear_page(adr);
|
||||
return adr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a middle page table and puts a pointer to it in the
|
||||
* given global directory entry. This only returns the gd entry
|
||||
@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
|
||||
if (after_bootmem)
|
||||
pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
|
||||
else
|
||||
pmd_table = (pmd_t *)alloc_low_page();
|
||||
pmd_table = (pmd_t *)alloc_low_page();
|
||||
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
||||
pud = pud_offset(pgd, 0);
|
||||
@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
||||
static pte_t * __init one_page_table_init(pmd_t *pmd)
|
||||
{
|
||||
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
|
||||
pte_t *page_table = NULL;
|
||||
|
||||
if (after_bootmem) {
|
||||
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
|
||||
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
|
||||
#endif
|
||||
if (!page_table)
|
||||
page_table =
|
||||
(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
|
||||
} else
|
||||
page_table = (pte_t *)alloc_low_page();
|
||||
pte_t *page_table = (pte_t *)alloc_low_page();
|
||||
|
||||
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
|
||||
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
|
||||
@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr)
|
||||
return one_page_table_init(pmd) + pte_idx;
|
||||
}
|
||||
|
||||
static unsigned long __init
|
||||
page_table_range_init_count(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long count = 0;
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
|
||||
int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
|
||||
int pgd_idx, pmd_idx;
|
||||
unsigned long vaddr;
|
||||
|
||||
if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
|
||||
return 0;
|
||||
|
||||
vaddr = start;
|
||||
pgd_idx = pgd_index(vaddr);
|
||||
|
||||
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
|
||||
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
|
||||
pmd_idx++) {
|
||||
if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
|
||||
(vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
|
||||
count++;
|
||||
vaddr += PMD_SIZE;
|
||||
}
|
||||
pmd_idx = 0;
|
||||
}
|
||||
#endif
|
||||
return count;
|
||||
}
|
||||
|
||||
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
|
||||
unsigned long vaddr, pte_t *lastpte)
|
||||
unsigned long vaddr, pte_t *lastpte,
|
||||
void **adr)
|
||||
{
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
/*
|
||||
@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
|
||||
|
||||
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
|
||||
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
|
||||
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
|
||||
&& ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
|
||||
|| (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
|
||||
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
|
||||
pte_t *newpte;
|
||||
int i;
|
||||
|
||||
BUG_ON(after_bootmem);
|
||||
newpte = alloc_low_page();
|
||||
newpte = *adr;
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
set_pte(newpte + i, pte[i]);
|
||||
*adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
|
||||
|
||||
paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
|
||||
set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
|
||||
@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
|
||||
pgd_t *pgd;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte = NULL;
|
||||
unsigned long count = page_table_range_init_count(start, end);
|
||||
void *adr = NULL;
|
||||
|
||||
if (count)
|
||||
adr = alloc_low_pages(count);
|
||||
|
||||
vaddr = start;
|
||||
pgd_idx = pgd_index(vaddr);
|
||||
@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
|
||||
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
|
||||
pmd++, pmd_idx++) {
|
||||
pte = page_table_kmap_check(one_page_table_init(pmd),
|
||||
pmd, vaddr, pte);
|
||||
pmd, vaddr, pte, &adr);
|
||||
|
||||
vaddr += PMD_SIZE;
|
||||
}
|
||||
@ -310,6 +321,7 @@ repeat:
|
||||
__pgprot(PTE_IDENT_ATTR |
|
||||
_PAGE_PSE);
|
||||
|
||||
pfn &= PMD_MASK >> PAGE_SHIFT;
|
||||
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
|
||||
PAGE_OFFSET + PAGE_SIZE-1;
|
||||
|
||||
@ -455,9 +467,14 @@ void __init native_pagetable_init(void)
|
||||
|
||||
/*
|
||||
* Remove any mappings which extend past the end of physical
|
||||
* memory from the boot time page table:
|
||||
* memory from the boot time page table.
|
||||
* In virtual address space, we should have at least two pages
|
||||
* from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
|
||||
* definition. And max_low_pfn is set to VMALLOC_END physical
|
||||
* address. If initial memory mapping is doing right job, we
|
||||
* should have pte used near max_low_pfn or one pmd is not present.
|
||||
*/
|
||||
for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
|
||||
for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
|
||||
va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
|
||||
pgd = base + pgd_index(va);
|
||||
if (!pgd_present(*pgd))
|
||||
@ -468,10 +485,19 @@ void __init native_pagetable_init(void)
|
||||
if (!pmd_present(*pmd))
|
||||
break;
|
||||
|
||||
/* should not be large page here */
|
||||
if (pmd_large(*pmd)) {
|
||||
pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
|
||||
pfn, pmd, __pa(pmd));
|
||||
BUG_ON(1);
|
||||
}
|
||||
|
||||
pte = pte_offset_kernel(pmd, va);
|
||||
if (!pte_present(*pte))
|
||||
break;
|
||||
|
||||
printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
|
||||
pfn, pmd, __pa(pmd), pte, __pa(pte));
|
||||
pte_clear(NULL, va, pte);
|
||||
}
|
||||
paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
|
||||
@ -550,7 +576,7 @@ early_param("highmem", parse_highmem);
|
||||
* artificially via the highmem=x boot parameter then create
|
||||
* it:
|
||||
*/
|
||||
void __init lowmem_pfn_init(void)
|
||||
static void __init lowmem_pfn_init(void)
|
||||
{
|
||||
/* max_low_pfn is 0, we already have early_res support */
|
||||
max_low_pfn = max_pfn;
|
||||
@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void)
|
||||
* We have more RAM than fits into lowmem - we try to put it into
|
||||
* highmem, also taking the highmem=x boot parameter into account:
|
||||
*/
|
||||
void __init highmem_pfn_init(void)
|
||||
static void __init highmem_pfn_init(void)
|
||||
{
|
||||
max_low_pfn = MAXMEM_PFN;
|
||||
|
||||
@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void)
|
||||
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
|
||||
max_pfn_mapped<<PAGE_SHIFT);
|
||||
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
|
||||
|
||||
after_bootmem = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -753,6 +777,8 @@ void __init mem_init(void)
|
||||
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
|
||||
reservedpages++;
|
||||
|
||||
after_bootmem = 1;
|
||||
|
||||
codesize = (unsigned long) &_etext - (unsigned long) &_text;
|
||||
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
|
||||
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
|
||||
|
@ -54,6 +54,82 @@
|
||||
#include <asm/uv/uv.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
addr &= PMD_MASK;
|
||||
for (; addr < end; addr += PMD_SIZE) {
|
||||
pmd_t *pmd = pmd_page + pmd_index(addr);
|
||||
|
||||
if (!pmd_present(*pmd))
|
||||
set_pmd(pmd, __pmd(addr | pmd_flag));
|
||||
}
|
||||
}
|
||||
static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
pud_t *pud = pud_page + pud_index(addr);
|
||||
pmd_t *pmd;
|
||||
|
||||
next = (addr & PUD_MASK) + PUD_SIZE;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (pud_present(*pud)) {
|
||||
pmd = pmd_offset(pud, 0);
|
||||
ident_pmd_init(info->pmd_flag, pmd, addr, next);
|
||||
continue;
|
||||
}
|
||||
pmd = (pmd_t *)info->alloc_pgt_page(info->context);
|
||||
if (!pmd)
|
||||
return -ENOMEM;
|
||||
ident_pmd_init(info->pmd_flag, pmd, addr, next);
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
int result;
|
||||
int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
pgd_t *pgd = pgd_page + pgd_index(addr) + off;
|
||||
pud_t *pud;
|
||||
|
||||
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (pgd_present(*pgd)) {
|
||||
pud = pud_offset(pgd, 0);
|
||||
result = ident_pud_init(info, pud, addr, next);
|
||||
if (result)
|
||||
return result;
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = (pud_t *)info->alloc_pgt_page(info->context);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
result = ident_pud_init(info, pud, addr, next);
|
||||
if (result)
|
||||
return result;
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_direct_gbpages_off(char *arg)
|
||||
{
|
||||
direct_gbpages = 0;
|
||||
@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
|
||||
void __init cleanup_highmap(void)
|
||||
{
|
||||
unsigned long vaddr = __START_KERNEL_map;
|
||||
unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
|
||||
unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
|
||||
unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
|
||||
pmd_t *pmd = level2_kernel_pgt;
|
||||
|
||||
/*
|
||||
* Native path, max_pfn_mapped is not set yet.
|
||||
* Xen has valid max_pfn_mapped set in
|
||||
* arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
|
||||
*/
|
||||
if (max_pfn_mapped)
|
||||
vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
|
||||
|
||||
for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
|
||||
}
|
||||
}
|
||||
|
||||
static __ref void *alloc_low_page(unsigned long *phys)
|
||||
{
|
||||
unsigned long pfn = pgt_buf_end++;
|
||||
void *adr;
|
||||
|
||||
if (after_bootmem) {
|
||||
adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
|
||||
*phys = __pa(adr);
|
||||
|
||||
return adr;
|
||||
}
|
||||
|
||||
if (pfn >= pgt_buf_top)
|
||||
panic("alloc_low_page: ran out of memory");
|
||||
|
||||
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
|
||||
clear_page(adr);
|
||||
*phys = pfn * PAGE_SIZE;
|
||||
return adr;
|
||||
}
|
||||
|
||||
static __ref void *map_low_page(void *virt)
|
||||
{
|
||||
void *adr;
|
||||
unsigned long phys, left;
|
||||
|
||||
if (after_bootmem)
|
||||
return virt;
|
||||
|
||||
phys = __pa(virt);
|
||||
left = phys & (PAGE_SIZE - 1);
|
||||
adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
|
||||
adr = (void *)(((unsigned long)adr) | left);
|
||||
|
||||
return adr;
|
||||
}
|
||||
|
||||
static __ref void unmap_low_page(void *adr)
|
||||
{
|
||||
if (after_bootmem)
|
||||
return;
|
||||
|
||||
early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
|
||||
}
|
||||
|
||||
static unsigned long __meminit
|
||||
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
|
||||
pgprot_t prot)
|
||||
{
|
||||
unsigned pages = 0;
|
||||
unsigned long pages = 0, next;
|
||||
unsigned long last_map_addr = end;
|
||||
int i;
|
||||
|
||||
pte_t *pte = pte_page + pte_index(addr);
|
||||
|
||||
for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
|
||||
|
||||
for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
|
||||
next = (addr & PAGE_MASK) + PAGE_SIZE;
|
||||
if (addr >= end) {
|
||||
if (!after_bootmem) {
|
||||
for(; i < PTRS_PER_PTE; i++, pte++)
|
||||
set_pte(pte, __pte(0));
|
||||
}
|
||||
break;
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
|
||||
set_pte(pte, __pte(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
int i = pmd_index(address);
|
||||
|
||||
for (; i < PTRS_PER_PMD; i++, address = next) {
|
||||
unsigned long pte_phys;
|
||||
pmd_t *pmd = pmd_page + pmd_index(address);
|
||||
pte_t *pte;
|
||||
pgprot_t new_prot = prot;
|
||||
|
||||
if (address >= end) {
|
||||
if (!after_bootmem) {
|
||||
for (; i < PTRS_PER_PMD; i++, pmd++)
|
||||
set_pmd(pmd, __pmd(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
next = (address & PMD_MASK) + PMD_SIZE;
|
||||
if (address >= end) {
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
|
||||
set_pmd(pmd, __pmd(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pmd_val(*pmd)) {
|
||||
if (!pmd_large(*pmd)) {
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
|
||||
pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||
last_map_addr = phys_pte_init(pte, address,
|
||||
end, prot);
|
||||
unmap_low_page(pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
continue;
|
||||
}
|
||||
@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte((pte_t *)pmd,
|
||||
pfn_pte(address >> PAGE_SHIFT,
|
||||
pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
|
||||
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
last_map_addr = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
pte = alloc_low_page(&pte_phys);
|
||||
pte = alloc_low_page();
|
||||
last_map_addr = phys_pte_init(pte, address, end, new_prot);
|
||||
unmap_low_page(pte);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
update_page_count(PG_LEVEL_2M, pages);
|
||||
@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
int i = pud_index(addr);
|
||||
|
||||
for (; i < PTRS_PER_PUD; i++, addr = next) {
|
||||
unsigned long pmd_phys;
|
||||
pud_t *pud = pud_page + pud_index(addr);
|
||||
pmd_t *pmd;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
if (addr >= end)
|
||||
break;
|
||||
|
||||
next = (addr & PUD_MASK) + PUD_SIZE;
|
||||
|
||||
if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
|
||||
set_pud(pud, __pud(0));
|
||||
if (addr >= end) {
|
||||
if (!after_bootmem &&
|
||||
!e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
|
||||
!e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
|
||||
set_pud(pud, __pud(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pud_val(*pud)) {
|
||||
if (!pud_large(*pud)) {
|
||||
pmd = map_low_page(pmd_offset(pud, 0));
|
||||
pmd = pmd_offset(pud, 0);
|
||||
last_map_addr = phys_pmd_init(pmd, addr, end,
|
||||
page_size_mask, prot);
|
||||
unmap_low_page(pmd);
|
||||
__flush_tlb_all();
|
||||
continue;
|
||||
}
|
||||
@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||
pages++;
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
set_pte((pte_t *)pud,
|
||||
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
||||
pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
|
||||
PAGE_KERNEL_LARGE));
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
last_map_addr = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
pmd = alloc_low_page(&pmd_phys);
|
||||
pmd = alloc_low_page();
|
||||
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
|
||||
prot);
|
||||
unmap_low_page(pmd);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_populate(&init_mm, pud, __va(pmd_phys));
|
||||
pud_populate(&init_mm, pud, pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
__flush_tlb_all();
|
||||
@ -578,28 +610,23 @@ kernel_physical_mapping_init(unsigned long start,
|
||||
|
||||
for (; start < end; start = next) {
|
||||
pgd_t *pgd = pgd_offset_k(start);
|
||||
unsigned long pud_phys;
|
||||
pud_t *pud;
|
||||
|
||||
next = (start + PGDIR_SIZE) & PGDIR_MASK;
|
||||
if (next > end)
|
||||
next = end;
|
||||
next = (start & PGDIR_MASK) + PGDIR_SIZE;
|
||||
|
||||
if (pgd_val(*pgd)) {
|
||||
pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
last_map_addr = phys_pud_init(pud, __pa(start),
|
||||
__pa(end), page_size_mask);
|
||||
unmap_low_page(pud);
|
||||
continue;
|
||||
}
|
||||
|
||||
pud = alloc_low_page(&pud_phys);
|
||||
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
|
||||
pud = alloc_low_page();
|
||||
last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
|
||||
page_size_mask);
|
||||
unmap_low_page(pud);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_populate(&init_mm, pgd, __va(pud_phys));
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pgd_changed = true;
|
||||
}
|
||||
@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
|
||||
unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
last_mapped_pfn = init_memory_mapping(start, start + size);
|
||||
if (last_mapped_pfn > max_pfn_mapped)
|
||||
max_pfn_mapped = last_mapped_pfn;
|
||||
init_memory_mapping(start, start + size);
|
||||
|
||||
ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
WARN_ON_ONCE(ret);
|
||||
@ -686,6 +711,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory);
|
||||
|
||||
static struct kcore_list kcore_vsyscall;
|
||||
|
||||
static void __init register_page_bootmem_info(void)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
int i;
|
||||
|
||||
for_each_online_node(i)
|
||||
register_page_bootmem_info_node(NODE_DATA(i));
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init mem_init(void)
|
||||
{
|
||||
long codesize, reservedpages, datasize, initsize;
|
||||
@ -698,11 +733,8 @@ void __init mem_init(void)
|
||||
reservedpages = 0;
|
||||
|
||||
/* this will put all low memory onto the freelists */
|
||||
#ifdef CONFIG_NUMA
|
||||
totalram_pages = numa_free_all_bootmem();
|
||||
#else
|
||||
register_page_bootmem_info();
|
||||
totalram_pages = free_all_bootmem();
|
||||
#endif
|
||||
|
||||
absent_pages = absent_pages_in_range(0, max_pfn);
|
||||
reservedpages = max_pfn - totalram_pages - absent_pages;
|
||||
@ -776,6 +808,7 @@ void mark_rodata_ro(void)
|
||||
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
|
||||
unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
|
||||
unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
|
||||
unsigned long all_end = PFN_ALIGN(&_end);
|
||||
|
||||
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
||||
(end - start) >> 10);
|
||||
@ -784,10 +817,10 @@ void mark_rodata_ro(void)
|
||||
kernel_set_to_readonly = 1;
|
||||
|
||||
/*
|
||||
* The rodata section (but not the kernel text!) should also be
|
||||
* not-executable.
|
||||
* The rodata/data/bss/brk section (but not the kernel text!)
|
||||
* should also be not-executable.
|
||||
*/
|
||||
set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
|
||||
set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
|
||||
|
||||
rodata_test();
|
||||
|
||||
|
19
arch/x86/mm/mm_internal.h
Normal file
19
arch/x86/mm/mm_internal.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef __X86_MM_INTERNAL_H
|
||||
#define __X86_MM_INTERNAL_H
|
||||
|
||||
void *alloc_low_pages(unsigned int num);
|
||||
static inline void *alloc_low_page(void)
|
||||
{
|
||||
return alloc_low_pages(1);
|
||||
}
|
||||
|
||||
void early_ioremap_page_table_range_init(void);
|
||||
|
||||
unsigned long kernel_physical_mapping_init(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long page_size_mask);
|
||||
void zone_sizes_init(void);
|
||||
|
||||
extern int after_bootmem;
|
||||
|
||||
#endif /* __X86_MM_INTERNAL_H */
|
@ -10,16 +10,3 @@ void __init initmem_init(void)
|
||||
{
|
||||
x86_numa_init();
|
||||
}
|
||||
|
||||
unsigned long __init numa_free_all_bootmem(void)
|
||||
{
|
||||
unsigned long pages = 0;
|
||||
int i;
|
||||
|
||||
for_each_online_node(i)
|
||||
pages += free_all_bootmem_node(NODE_DATA(i));
|
||||
|
||||
pages += free_low_memory_core_early(MAX_NUMNODES);
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
@ -579,16 +579,10 @@ static int split_large_page(pte_t *kpte, unsigned long address)
|
||||
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
|
||||
set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
|
||||
|
||||
if (address >= (unsigned long)__va(0) &&
|
||||
address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
|
||||
if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
|
||||
PFN_DOWN(__pa(address)) + 1))
|
||||
split_page_count(level);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (address >= (unsigned long)__va(1UL<<32) &&
|
||||
address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
|
||||
split_page_count(level);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Install the new, split up pagetable.
|
||||
*
|
||||
@ -757,13 +751,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
|
||||
unsigned long vaddr;
|
||||
int ret;
|
||||
|
||||
if (cpa->pfn >= max_pfn_mapped)
|
||||
if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
|
||||
return 0;
|
||||
#endif
|
||||
/*
|
||||
* No need to redo, when the primary call touched the direct
|
||||
* mapping already:
|
||||
|
@ -835,7 +835,7 @@ void __init efi_enter_virtual_mode(void)
|
||||
efi_memory_desc_t *md, *prev_md = NULL;
|
||||
efi_status_t status;
|
||||
unsigned long size;
|
||||
u64 end, systab, end_pfn;
|
||||
u64 end, systab, start_pfn, end_pfn;
|
||||
void *p, *va, *new_memmap = NULL;
|
||||
int count = 0;
|
||||
|
||||
@ -888,10 +888,9 @@ void __init efi_enter_virtual_mode(void)
|
||||
size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
end = md->phys_addr + size;
|
||||
|
||||
start_pfn = PFN_DOWN(md->phys_addr);
|
||||
end_pfn = PFN_UP(end);
|
||||
if (end_pfn <= max_low_pfn_mapped
|
||||
|| (end_pfn > (1UL << (32 - PAGE_SHIFT))
|
||||
&& end_pfn <= max_pfn_mapped)) {
|
||||
if (pfn_range_is_mapped(start_pfn, end_pfn)) {
|
||||
va = __va(md->phys_addr);
|
||||
|
||||
if (!(md->attribute & EFI_MEMORY_WB))
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/suspend.h>
|
||||
|
||||
#include <asm/init.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt;
|
||||
|
||||
void *relocated_restore_code;
|
||||
|
||||
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
|
||||
static void *alloc_pgt_page(void *context)
|
||||
{
|
||||
long i, j;
|
||||
|
||||
i = pud_index(address);
|
||||
pud = pud + i;
|
||||
for (; i < PTRS_PER_PUD; pud++, i++) {
|
||||
unsigned long paddr;
|
||||
pmd_t *pmd;
|
||||
|
||||
paddr = address + i*PUD_SIZE;
|
||||
if (paddr >= end)
|
||||
break;
|
||||
|
||||
pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pmd)
|
||||
return -ENOMEM;
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
|
||||
unsigned long pe;
|
||||
|
||||
if (paddr >= end)
|
||||
break;
|
||||
pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
|
||||
pe &= __supported_pte_mask;
|
||||
set_pmd(pmd, __pmd(pe));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return (void *)get_safe_page(GFP_ATOMIC);
|
||||
}
|
||||
|
||||
static int set_up_temporary_mappings(void)
|
||||
{
|
||||
unsigned long start, end, next;
|
||||
int error;
|
||||
struct x86_mapping_info info = {
|
||||
.alloc_pgt_page = alloc_pgt_page,
|
||||
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
|
||||
.kernel_mapping = true,
|
||||
};
|
||||
unsigned long mstart, mend;
|
||||
int result;
|
||||
int i;
|
||||
|
||||
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!temp_level4_pgt)
|
||||
@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void)
|
||||
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
|
||||
|
||||
/* Set up the direct mapping from scratch */
|
||||
start = (unsigned long)pfn_to_kaddr(0);
|
||||
end = (unsigned long)pfn_to_kaddr(max_pfn);
|
||||
for (i = 0; i < nr_pfn_mapped; i++) {
|
||||
mstart = pfn_mapped[i].start << PAGE_SHIFT;
|
||||
mend = pfn_mapped[i].end << PAGE_SHIFT;
|
||||
|
||||
for (; start < end; start = next) {
|
||||
pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
next = start + PGDIR_SIZE;
|
||||
if (next > end)
|
||||
next = end;
|
||||
if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
|
||||
return error;
|
||||
set_pgd(temp_level4_pgt + pgd_index(start),
|
||||
mk_kernel_pgd(__pa(pud)));
|
||||
result = kernel_ident_mapping_init(&info, temp_level4_pgt,
|
||||
mstart, mend);
|
||||
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,26 @@
|
||||
struct real_mode_header *real_mode_header;
|
||||
u32 *trampoline_cr4_features;
|
||||
|
||||
void __init setup_real_mode(void)
|
||||
void __init reserve_real_mode(void)
|
||||
{
|
||||
phys_addr_t mem;
|
||||
unsigned char *base;
|
||||
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
|
||||
|
||||
/* Has to be under 1M so we can execute real-mode AP code. */
|
||||
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
|
||||
if (!mem)
|
||||
panic("Cannot allocate trampoline\n");
|
||||
|
||||
base = __va(mem);
|
||||
memblock_reserve(mem, size);
|
||||
real_mode_header = (struct real_mode_header *) base;
|
||||
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
|
||||
base, (unsigned long long)mem, size);
|
||||
}
|
||||
|
||||
void __init setup_real_mode(void)
|
||||
{
|
||||
u16 real_mode_seg;
|
||||
u32 *rel;
|
||||
u32 count;
|
||||
@ -25,16 +42,7 @@ void __init setup_real_mode(void)
|
||||
u64 efer;
|
||||
#endif
|
||||
|
||||
/* Has to be in very low memory so we can execute real-mode AP code. */
|
||||
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
|
||||
if (!mem)
|
||||
panic("Cannot allocate trampoline\n");
|
||||
|
||||
base = __va(mem);
|
||||
memblock_reserve(mem, size);
|
||||
real_mode_header = (struct real_mode_header *) base;
|
||||
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
|
||||
base, (unsigned long long)mem, size);
|
||||
base = (unsigned char *)real_mode_header;
|
||||
|
||||
memcpy(base, real_mode_blob, size);
|
||||
|
||||
@ -78,16 +86,18 @@ void __init setup_real_mode(void)
|
||||
*trampoline_cr4_features = read_cr4();
|
||||
|
||||
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
|
||||
trampoline_pgd[0] = __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE;
|
||||
trampoline_pgd[511] = __pa_symbol(level3_kernel_pgt) + _KERNPG_TABLE;
|
||||
trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
|
||||
trampoline_pgd[511] = init_level4_pgt[511].pgd;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* set_real_mode_permissions() gets called very early, to guarantee the
|
||||
* availability of low memory. This is before the proper kernel page
|
||||
* reserve_real_mode() gets called very early, to guarantee the
|
||||
* availability of low memory. This is before the proper kernel page
|
||||
* tables are set up, so we cannot set page permissions in that
|
||||
* function. Thus, we use an arch_initcall instead.
|
||||
* function. Also trampoline code will be executed by APs so we
|
||||
* need to mark it executable at do_pre_smp_initcalls() at least,
|
||||
* thus run it as a early_initcall().
|
||||
*/
|
||||
static int __init set_real_mode_permissions(void)
|
||||
{
|
||||
@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
arch_initcall(set_real_mode_permissions);
|
||||
early_initcall(set_real_mode_permissions);
|
||||
|
@ -814,12 +814,14 @@ int main(int argc, char **argv)
|
||||
read_relocs(fp);
|
||||
if (show_absolute_syms) {
|
||||
print_absolute_symbols();
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
if (show_absolute_relocs) {
|
||||
print_absolute_relocs();
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
emit_relocs(as_text, use_real_mode);
|
||||
out:
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
|
||||
|
||||
static void xen_post_allocator_init(void);
|
||||
|
||||
static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
|
||||
{
|
||||
/* reserve the range used */
|
||||
native_pagetable_reserve(start, end);
|
||||
|
||||
/* set as RW the rest */
|
||||
printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
|
||||
PFN_PHYS(pgt_buf_top));
|
||||
while (end < PFN_PHYS(pgt_buf_top)) {
|
||||
make_lowmem_page_readwrite(__va(end));
|
||||
end += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void __init xen_cleanhighmap(unsigned long vaddr,
|
||||
unsigned long vaddr_end)
|
||||
@ -1503,19 +1489,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||
#else /* CONFIG_X86_64 */
|
||||
static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
unsigned long pfn = pte_pfn(pte);
|
||||
|
||||
/*
|
||||
* If the new pfn is within the range of the newly allocated
|
||||
* kernel pagetable, and it isn't being mapped into an
|
||||
* early_ioremap fixmap slot as a freshly allocated page, make sure
|
||||
* it is RO.
|
||||
*/
|
||||
if (((!is_early_ioremap_ptep(ptep) &&
|
||||
pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
|
||||
(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
|
||||
pte = pte_wrprotect(pte);
|
||||
|
||||
return pte;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
@ -2197,7 +2170,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
|
||||
void __init xen_init_mmu_ops(void)
|
||||
{
|
||||
x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
|
||||
x86_init.paging.pagetable_init = xen_pagetable_init;
|
||||
pv_mmu_ops = xen_mmu_ops;
|
||||
|
||||
|
@ -231,7 +231,9 @@ retry:
|
||||
}
|
||||
start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
|
||||
if (early) {
|
||||
swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
|
||||
if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
|
||||
verbose))
|
||||
panic("Cannot allocate SWIOTLB buffer");
|
||||
rc = 0;
|
||||
} else
|
||||
rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
|
||||
|
@ -99,6 +99,9 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
|
||||
extern void *__alloc_bootmem_low(unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal);
|
||||
void *__alloc_bootmem_low_nopanic(unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal);
|
||||
extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
|
||||
unsigned long size,
|
||||
unsigned long align,
|
||||
@ -132,6 +135,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
|
||||
|
||||
#define alloc_bootmem_low(x) \
|
||||
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
|
||||
#define alloc_bootmem_low_pages_nopanic(x) \
|
||||
__alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
|
||||
#define alloc_bootmem_low_pages(x) \
|
||||
__alloc_bootmem_low(x, PAGE_SIZE, 0)
|
||||
#define alloc_bootmem_low_pages_node(pgdat, x) \
|
||||
|
@ -191,6 +191,7 @@ extern struct kimage *kexec_crash_image;
|
||||
/* Location of a reserved region to hold the crash kernel.
|
||||
*/
|
||||
extern struct resource crashk_res;
|
||||
extern struct resource crashk_low_res;
|
||||
typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
|
||||
extern note_buf_t __percpu *crash_notes;
|
||||
extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
|
||||
@ -199,6 +200,8 @@ extern size_t vmcoreinfo_max_size;
|
||||
|
||||
int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
|
||||
unsigned long long *crash_size, unsigned long long *crash_base);
|
||||
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
|
||||
unsigned long long *crash_size, unsigned long long *crash_base);
|
||||
int crash_shrink_memory(unsigned long new_size);
|
||||
size_t crash_get_memory_size(void);
|
||||
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
|
||||
|
@ -155,6 +155,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
|
||||
phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
|
||||
phys_addr_t max_addr);
|
||||
phys_addr_t memblock_phys_mem_size(void);
|
||||
phys_addr_t memblock_mem_size(unsigned long limit_pfn);
|
||||
phys_addr_t memblock_start_of_DRAM(void);
|
||||
phys_addr_t memblock_end_of_DRAM(void);
|
||||
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
|
||||
|
@ -1386,7 +1386,6 @@ extern void __init mmap_init(void);
|
||||
extern void show_mem(unsigned int flags);
|
||||
extern void si_meminfo(struct sysinfo * val);
|
||||
extern void si_meminfo_node(struct sysinfo *val, int nid);
|
||||
extern int after_bootmem;
|
||||
|
||||
extern __printf(3, 4)
|
||||
void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
|
||||
|
@ -23,7 +23,7 @@ extern int swiotlb_force;
|
||||
#define IO_TLB_SHIFT 11
|
||||
|
||||
extern void swiotlb_init(int verbose);
|
||||
extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
|
||||
int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
|
||||
extern unsigned long swiotlb_nr_tbl(void);
|
||||
extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
|
||||
|
||||
|
@ -54,6 +54,12 @@ struct resource crashk_res = {
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
};
|
||||
struct resource crashk_low_res = {
|
||||
.name = "Crash kernel low",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
};
|
||||
|
||||
int kexec_should_crash(struct task_struct *p)
|
||||
{
|
||||
@ -1369,10 +1375,11 @@ static int __init parse_crashkernel_simple(char *cmdline,
|
||||
* That function is the entry point for command line parsing and should be
|
||||
* called from the arch-specific code.
|
||||
*/
|
||||
int __init parse_crashkernel(char *cmdline,
|
||||
static int __init __parse_crashkernel(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base)
|
||||
unsigned long long *crash_base,
|
||||
const char *name)
|
||||
{
|
||||
char *p = cmdline, *ck_cmdline = NULL;
|
||||
char *first_colon, *first_space;
|
||||
@ -1382,16 +1389,16 @@ int __init parse_crashkernel(char *cmdline,
|
||||
*crash_base = 0;
|
||||
|
||||
/* find crashkernel and use the last one if there are more */
|
||||
p = strstr(p, "crashkernel=");
|
||||
p = strstr(p, name);
|
||||
while (p) {
|
||||
ck_cmdline = p;
|
||||
p = strstr(p+1, "crashkernel=");
|
||||
p = strstr(p+1, name);
|
||||
}
|
||||
|
||||
if (!ck_cmdline)
|
||||
return -EINVAL;
|
||||
|
||||
ck_cmdline += 12; /* strlen("crashkernel=") */
|
||||
ck_cmdline += strlen(name);
|
||||
|
||||
/*
|
||||
* if the commandline contains a ':', then that's the extended
|
||||
@ -1409,6 +1416,23 @@ int __init parse_crashkernel(char *cmdline,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init parse_crashkernel(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base)
|
||||
{
|
||||
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
|
||||
"crashkernel=");
|
||||
}
|
||||
|
||||
int __init parse_crashkernel_low(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base)
|
||||
{
|
||||
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
|
||||
"crashkernel_low=");
|
||||
}
|
||||
|
||||
static void update_vmcoreinfo_note(void)
|
||||
{
|
||||
|
@ -122,11 +122,18 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
|
||||
return phys_to_dma(hwdev, virt_to_phys(address));
|
||||
}
|
||||
|
||||
static bool no_iotlb_memory;
|
||||
|
||||
void swiotlb_print_info(void)
|
||||
{
|
||||
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
|
||||
unsigned char *vstart, *vend;
|
||||
|
||||
if (no_iotlb_memory) {
|
||||
pr_warn("software IO TLB: No low mem\n");
|
||||
return;
|
||||
}
|
||||
|
||||
vstart = phys_to_virt(io_tlb_start);
|
||||
vend = phys_to_virt(io_tlb_end);
|
||||
|
||||
@ -136,7 +143,7 @@ void swiotlb_print_info(void)
|
||||
bytes >> 20, vstart, vend - 1);
|
||||
}
|
||||
|
||||
void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
|
||||
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
|
||||
{
|
||||
void *v_overflow_buffer;
|
||||
unsigned long i, bytes;
|
||||
@ -150,9 +157,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
|
||||
/*
|
||||
* Get the overflow emergency buffer
|
||||
*/
|
||||
v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
|
||||
v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
|
||||
PAGE_ALIGN(io_tlb_overflow));
|
||||
if (!v_overflow_buffer)
|
||||
panic("Cannot allocate SWIOTLB overflow buffer!\n");
|
||||
return -ENOMEM;
|
||||
|
||||
io_tlb_overflow_buffer = __pa(v_overflow_buffer);
|
||||
|
||||
@ -169,15 +177,19 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
|
||||
|
||||
if (verbose)
|
||||
swiotlb_print_info();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Statically reserve bounce buffer space and initialize bounce buffer data
|
||||
* structures for the software IO TLB used to implement the DMA API.
|
||||
*/
|
||||
static void __init
|
||||
swiotlb_init_with_default_size(size_t default_size, int verbose)
|
||||
void __init
|
||||
swiotlb_init(int verbose)
|
||||
{
|
||||
/* default to 64MB */
|
||||
size_t default_size = 64UL<<20;
|
||||
unsigned char *vstart;
|
||||
unsigned long bytes;
|
||||
|
||||
@ -188,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
|
||||
|
||||
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
|
||||
|
||||
/*
|
||||
* Get IO TLB memory from the low pages
|
||||
*/
|
||||
vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
|
||||
if (!vstart)
|
||||
panic("Cannot allocate SWIOTLB buffer");
|
||||
/* Get IO TLB memory from the low pages */
|
||||
vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
|
||||
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
|
||||
return;
|
||||
|
||||
swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose);
|
||||
}
|
||||
|
||||
void __init
|
||||
swiotlb_init(int verbose)
|
||||
{
|
||||
swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */
|
||||
if (io_tlb_start)
|
||||
free_bootmem(io_tlb_start,
|
||||
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
|
||||
pr_warn("Cannot allocate SWIOTLB buffer");
|
||||
no_iotlb_memory = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -405,6 +413,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
|
||||
unsigned long offset_slots;
|
||||
unsigned long max_slots;
|
||||
|
||||
if (no_iotlb_memory)
|
||||
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
|
||||
|
||||
mask = dma_get_seg_boundary(hwdev);
|
||||
|
||||
tbl_dma_addr &= mask;
|
||||
|
@ -833,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
|
||||
return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
|
||||
}
|
||||
|
||||
void * __init __alloc_bootmem_low_nopanic(unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal)
|
||||
{
|
||||
return ___alloc_bootmem_nopanic(size, align, goal,
|
||||
ARCH_LOW_ADDRESS_LIMIT);
|
||||
}
|
||||
|
||||
/**
|
||||
* __alloc_bootmem_low_node - allocate low boot memory from a specific node
|
||||
* @pgdat: node to allocate from
|
||||
|
@ -828,6 +828,23 @@ phys_addr_t __init memblock_phys_mem_size(void)
|
||||
return memblock.memory.total_size;
|
||||
}
|
||||
|
||||
phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
|
||||
{
|
||||
unsigned long pages = 0;
|
||||
struct memblock_region *r;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
|
||||
for_each_memblock(memory, r) {
|
||||
start_pfn = memblock_region_memory_base_pfn(r);
|
||||
end_pfn = memblock_region_memory_end_pfn(r);
|
||||
start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
|
||||
end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
|
||||
pages += end_pfn - start_pfn;
|
||||
}
|
||||
|
||||
return (phys_addr_t)pages << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/* lowest address */
|
||||
phys_addr_t __init_memblock memblock_start_of_DRAM(void)
|
||||
{
|
||||
|
@ -153,21 +153,6 @@ static void reset_node_lowmem_managed_pages(pg_data_t *pgdat)
|
||||
z->managed_pages = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_all_bootmem_node - release a node's free pages to the buddy allocator
|
||||
* @pgdat: node to be released
|
||||
*
|
||||
* Returns the number of pages actually released.
|
||||
*/
|
||||
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
|
||||
{
|
||||
register_page_bootmem_info_node(pgdat);
|
||||
reset_node_lowmem_managed_pages(pgdat);
|
||||
|
||||
/* free_low_memory_core_early(MAX_NUMNODES) will be called later */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_all_bootmem - release free pages to the buddy allocator
|
||||
*
|
||||
@ -406,6 +391,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
|
||||
return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
|
||||
}
|
||||
|
||||
void * __init __alloc_bootmem_low_nopanic(unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal)
|
||||
{
|
||||
return ___alloc_bootmem_nopanic(size, align, goal,
|
||||
ARCH_LOW_ADDRESS_LIMIT);
|
||||
}
|
||||
|
||||
/**
|
||||
* __alloc_bootmem_low_node - allocate low boot memory from a specific node
|
||||
* @pgdat: node to allocate from
|
||||
|
Loading…
Reference in New Issue
Block a user