2008-10-23 13:26:29 +08:00
|
|
|
#ifndef _ASM_X86_PGTABLE_32_H
|
|
|
|
#define _ASM_X86_PGTABLE_32_H
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-02-09 10:49:05 +08:00
|
|
|
#include <asm/pgtable_32_types.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The Linux memory management assumes a three-level page table setup. On
|
|
|
|
* the i386, we use that, but "fold" the mid level into the top-level page
|
|
|
|
* table, so that we physically have the same two-level page table as the
|
|
|
|
* i386 mmu expects.
|
|
|
|
*
|
|
|
|
* This file contains the functions and defines necessary to modify and use
|
|
|
|
* the i386 page table tree.
|
|
|
|
*/
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/fixmap.h>
|
|
|
|
#include <linux/threads.h>
|
2006-12-07 09:14:08 +08:00
|
|
|
#include <asm/paravirt.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-19 14:40:25 +08:00
|
|
|
#include <linux/bitops.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
|
2005-11-07 16:59:43 +08:00
|
|
|
struct mm_struct;
|
|
|
|
struct vm_area_struct;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
extern pgd_t swapper_pg_dir[1024];
|
x86-32: Separate 1:1 pagetables from swapper_pg_dir
This patch fixes machine crashes which occur when heavily exercising the
CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by
AMD Erratum 383 and result in a fatal machine check exception. Here's
the scenario:
1. On 32-bit, the swapper_pg_dir page table is used as the initial page
table for booting a secondary CPU.
2. To make this work, swapper_pg_dir needs a direct mapping of physical
memory in it (the low mappings). By adding those low, large page (2M)
mappings (PAE kernel), we create the necessary conditions for Erratum
383 to occur.
3. Other CPUs which do not participate in the off- and onlining game may
use swapper_pg_dir while the low mappings are present (when leave_mm is
called). For all steps below, the CPU referred to is a CPU that is using
swapper_pg_dir, and not the CPU which is being onlined.
4. The presence of the low mappings in swapper_pg_dir can result
in TLB entries for addresses below __PAGE_OFFSET to be established
speculatively. These TLB entries are marked global and large.
5. When the CPU with such TLB entry switches to another page table, this
TLB entry remains because it is global.
6. The process then generates an access to an address covered by the
above TLB entry but there is a permission mismatch - the TLB entry
covers a large global page not accessible to userspace.
7. Due to this permission mismatch a new 4kb, user TLB entry gets
established. Further, Erratum 383 provides for a small window of time
where both TLB entries are present. This results in an uncorrectable
machine check exception signalling a TLB multimatch which panics the
machine.
There are two ways to fix this issue:
1. Always do a global TLB flush when a new cr3 is loaded and the
old page table was swapper_pg_dir. I consider this a hack hard
to understand and with performance implications
2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit
does.
This patch implements solution 2. It introduces a trampoline_pg_dir
which has the same layout as swapper_pg_dir with low_mappings. This page
table is used as the initial page table of the booting CPU. Later in the
bringup process, it switches to swapper_pg_dir and does a global TLB
flush. This fixes the crashes in our test cases.
-v2: switch to swapper_pg_dir right after entering start_secondary() so
that we are able to access percpu data which might not be mapped in the
trampoline page table.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
LKML-Reference: <20100816123833.GB28147@aftab>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-08-16 20:38:33 +08:00
|
|
|
extern pgd_t trampoline_pg_dir[1024];
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-03-09 20:14:37 +08:00
|
|
|
static inline void pgtable_cache_init(void) { }
|
|
|
|
static inline void check_pgt_cache(void) { }
|
2005-04-17 06:20:36 +08:00
|
|
|
void paging_init(void);
|
|
|
|
|
2008-07-23 20:11:59 +08:00
|
|
|
extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
|
2007-05-13 02:15:24 +08:00
|
|
|
|
2008-08-21 07:43:03 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Define this if things work differently on an i386 and an i486:
|
|
|
|
* it will (on an i486) warn about kernel memory accesses that are
|
2005-05-01 23:59:08 +08:00
|
|
|
* done without a 'access_ok(VERIFY_WRITE,..)'
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-05-01 23:59:08 +08:00
|
|
|
#undef TEST_ACCESS_OK
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
# include <asm/pgtable-3level.h>
|
|
|
|
#else
|
|
|
|
# include <asm/pgtable-2level.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_HIGHPTE)
|
2009-06-15 22:46:05 +08:00
|
|
|
#define __KM_PTE \
|
|
|
|
(in_nmi() ? KM_NMI_PTE : \
|
|
|
|
in_irq() ? KM_IRQ_PTE : \
|
|
|
|
KM_PTE0)
|
2008-03-23 16:03:09 +08:00
|
|
|
#define pte_offset_map(dir, address) \
|
2010-02-27 01:16:02 +08:00
|
|
|
((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \
|
2008-03-23 16:03:09 +08:00
|
|
|
pte_index((address)))
|
|
|
|
#define pte_offset_map_nested(dir, address) \
|
2010-02-27 01:16:02 +08:00
|
|
|
((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \
|
2008-03-23 16:03:09 +08:00
|
|
|
pte_index((address)))
|
2009-06-15 18:40:41 +08:00
|
|
|
#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
|
2008-03-23 16:03:09 +08:00
|
|
|
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
|
2005-04-17 06:20:36 +08:00
|
|
|
#else
|
2008-03-23 16:03:09 +08:00
|
|
|
#define pte_offset_map(dir, address) \
|
|
|
|
((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
|
|
|
|
#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
|
2005-04-17 06:20:36 +08:00
|
|
|
#define pte_unmap(pte) do { } while (0)
|
|
|
|
#define pte_unmap_nested(pte) do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
2006-10-01 14:29:35 +08:00
|
|
|
/* Clear a kernel PTE and flush it from the TLB */
|
2008-03-23 16:03:09 +08:00
|
|
|
#define kpte_clear_flush(ptep, vaddr) \
|
|
|
|
do { \
|
|
|
|
pte_clear(&init_mm, (vaddr), (ptep)); \
|
|
|
|
__flush_tlb_one((vaddr)); \
|
2006-10-01 14:29:35 +08:00
|
|
|
} while (0)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* The i386 doesn't have any external MMU info: the kernel page
|
|
|
|
* tables contain all the necessary information.
|
|
|
|
*/
|
MM: Pass a PTE pointer to update_mmu_cache() rather than the PTE itself
On VIVT ARM, when we have multiple shared mappings of the same file
in the same MM, we need to ensure that we have coherency across all
copies. We do this via make_coherent() by making the pages
uncacheable.
This used to work fine, until we allowed highmem with highpte - we
now have a page table which is mapped as required, and is not available
for modification via update_mmu_cache().
Ralf Beache suggested getting rid of the PTE value passed to
update_mmu_cache():
On MIPS update_mmu_cache() calls __update_tlb() which walks pagetables
to construct a pointer to the pte again. Passing a pte_t * is much
more elegant. Maybe we might even replace the pte argument with the
pte_t?
Ben Herrenschmidt would also like the pte pointer for PowerPC:
Passing the ptep in there is exactly what I want. I want that
-instead- of the PTE value, because I have issue on some ppc cases,
for I$/D$ coherency, where set_pte_at() may decide to mask out the
_PAGE_EXEC.
So, pass in the mapped page table pointer into update_mmu_cache(), and
remove the PTE value, updating all implementations and call sites to
suit.
Includes a fix from Stephen Rothwell:
sparc: fix fallout from update_mmu_cache API change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2009-12-19 00:40:18 +08:00
|
|
|
#define update_mmu_cache(vma, address, ptep) do { } while (0)
|
[PATCH] i386: PARAVIRT: Hooks to set up initial pagetable
This patch introduces paravirt_ops hooks to control how the kernel's
initial pagetable is set up.
In the case of a native boot, the very early bootstrap code creates a
simple non-PAE pagetable to map the kernel and physical memory. When
the VM subsystem is initialized, it creates a proper pagetable which
respects the PAE mode, large pages, etc.
When booting under a hypervisor, there are many possibilities for what
paging environment the hypervisor establishes for the guest kernel, so
the constructon of the kernel's pagetable depends on the hypervisor.
In the case of Xen, the hypervisor boots the kernel with a fully
constructed pagetable, which is already using PAE if necessary. Also,
Xen requires particular care when constructing pagetables to make sure
all pagetables are always mapped read-only.
In order to make this easier, kernel's initial pagetable construction
has been changed to only allocate and initialize a pagetable page if
there's no page already present in the pagetable. This allows the Xen
paravirt backend to make a copy of the hypervisor-provided pagetable,
allowing the kernel to establish any more mappings it needs while
keeping the existing ones.
A slightly subtle point which is worth highlighting here is that Xen
requires all kernel mappings to share the same pte_t pages between all
pagetables, so that updating a kernel page's mapping in one pagetable
is reflected in all other pagetables. This makes it possible to
allocate a page and attach it to a pagetable without having to
explicitly enumerate that page's mapping in all pagetables.
And:
+From: "Eric W. Biederman" <ebiederm@xmission.com>
If we don't set the leaf page table entries it is quite possible that
will inherit and incorrect page table entry from the initial boot
page table setup in head.S. So we need to redo the effort here,
so we pick up PSE, PGE and the like.
Hypervisors like Xen require that their page tables be read-only,
which is slightly incompatible with our low identity mappings, however
I discussed this with Jeremy he has modified the Xen early set_pte
function to avoid problems in this area.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: William Irwin <bill.irwin@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
2007-05-03 01:27:13 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
2008-01-30 20:30:37 +08:00
|
|
|
/*
|
|
|
|
* kern_addr_valid() is (1) for FLATMEM and (0) for
|
|
|
|
* SPARSEMEM and DISCONTIGMEM
|
|
|
|
*/
|
2005-06-23 15:07:57 +08:00
|
|
|
#ifdef CONFIG_FLATMEM
|
2005-04-17 06:20:36 +08:00
|
|
|
#define kern_addr_valid(addr) (1)
|
2008-01-30 20:30:37 +08:00
|
|
|
#else
|
|
|
|
#define kern_addr_valid(kaddr) (0)
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-10-23 13:26:29 +08:00
|
|
|
#endif /* _ASM_X86_PGTABLE_32_H */
|