mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-15 10:24:44 +08:00
2f569afd9c
Background: I've implemented 1K/2K page tables for s390. These sub-page page tables are required to properly support the s390 virtualization instruction with KVM. The SIE instruction requires that the page tables have 256 page table entries (pte) followed by 256 page status table entries (pgste). The pgstes are only required if the process is using the SIE instruction. The pgstes are updated by the hardware and by the hypervisor for a number of reasons, one of them is dirty and reference bit tracking. To avoid wasting memory the standard pte table allocation should return 1K/2K (31/64 bit) and 2K/4K if the process is using SIE. Problem: Page size on s390 is 4K, page table size is 1K or 2K. That means the s390 version for pte_alloc_one cannot return a pointer to a struct page. Trouble is that with the CONFIG_HIGHPTE feature on x86 pte_alloc_one cannot return a pointer to a pte either, since that would require more than 32 bit for the return value of pte_alloc_one (and the pte * would not be accessible since its not kmapped). Solution: The only solution I found to this dilemma is a new typedef: a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced with a later patch. For everybody else it will be a (struct page *). The additional problem with the initialization of the ptl lock and the NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor and a destructor pgtable_page_dtor. The page table allocation and free functions need to call these two whenever a page table page is allocated or freed. pmd_populate will get a pgtable_t instead of a struct page pointer. To get the pgtable_t back from a pmd entry that has been installed with pmd_populate a new function pmd_pgtable is added. It replaces the pmd_page call in free_pte_range and apply_to_pte_range. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
389 lines
9.0 KiB
C
389 lines
9.0 KiB
C
/*
|
|
* This file contains the routines setting up the linux page tables.
|
|
* -- paulus
|
|
*
|
|
* Derived from arch/ppc/mm/init.c:
|
|
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
|
|
*
|
|
* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
|
|
* and Cort Dougan (PReP) (cort@cs.nmt.edu)
|
|
* Copyright (C) 1996 Paul Mackerras
|
|
*
|
|
* Derived from "arch/i386/mm/init.c"
|
|
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/init.h>
|
|
#include <linux/highmem.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/io.h>
|
|
|
|
#include "mmu_decl.h"
|
|
|
|
unsigned long ioremap_base;
|
|
unsigned long ioremap_bot;
|
|
EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
|
|
|
|
#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
|
|
#define HAVE_BATS 1
|
|
#endif
|
|
|
|
#if defined(CONFIG_FSL_BOOKE)
|
|
#define HAVE_TLBCAM 1
|
|
#endif
|
|
|
|
extern char etext[], _stext[];
|
|
|
|
#ifdef CONFIG_SMP
|
|
extern void hash_page_sync(void);
|
|
#endif
|
|
|
|
#ifdef HAVE_BATS
|
|
extern unsigned long v_mapped_by_bats(unsigned long va);
|
|
extern unsigned long p_mapped_by_bats(unsigned long pa);
|
|
void setbat(int index, unsigned long virt, unsigned long phys,
|
|
unsigned int size, int flags);
|
|
|
|
#else /* !HAVE_BATS */
|
|
#define v_mapped_by_bats(x) (0UL)
|
|
#define p_mapped_by_bats(x) (0UL)
|
|
#endif /* HAVE_BATS */
|
|
|
|
#ifdef HAVE_TLBCAM
|
|
extern unsigned int tlbcam_index;
|
|
extern unsigned long v_mapped_by_tlbcam(unsigned long va);
|
|
extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
|
|
#else /* !HAVE_TLBCAM */
|
|
#define v_mapped_by_tlbcam(x) (0UL)
|
|
#define p_mapped_by_tlbcam(x) (0UL)
|
|
#endif /* HAVE_TLBCAM */
|
|
|
|
#ifdef CONFIG_PTE_64BIT
|
|
/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
|
|
#define PGDIR_ORDER 1
|
|
#else
|
|
#define PGDIR_ORDER 0
|
|
#endif
|
|
|
|
pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
pgd_t *ret;
|
|
|
|
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
|
|
return ret;
|
|
}
|
|
|
|
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
{
|
|
free_pages((unsigned long)pgd, PGDIR_ORDER);
|
|
}
|
|
|
|
__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
pte_t *pte;
|
|
extern int mem_init_done;
|
|
extern void *early_get_page(void);
|
|
|
|
if (mem_init_done) {
|
|
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
|
|
} else {
|
|
pte = (pte_t *)early_get_page();
|
|
if (pte)
|
|
clear_page(pte);
|
|
}
|
|
return pte;
|
|
}
|
|
|
|
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
struct page *ptepage;
|
|
|
|
#ifdef CONFIG_HIGHPTE
|
|
gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO;
|
|
#else
|
|
gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
|
|
#endif
|
|
|
|
ptepage = alloc_pages(flags, 0);
|
|
if (!ptepage)
|
|
return NULL;
|
|
pgtable_page_ctor(ptepage);
|
|
return ptepage;
|
|
}
|
|
|
|
void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
hash_page_sync();
|
|
#endif
|
|
free_page((unsigned long)pte);
|
|
}
|
|
|
|
void pte_free(struct mm_struct *mm, pgtable_t ptepage)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
hash_page_sync();
|
|
#endif
|
|
pgtable_page_dtor(ptepage);
|
|
__free_page(ptepage);
|
|
}
|
|
|
|
void __iomem *
|
|
ioremap(phys_addr_t addr, unsigned long size)
|
|
{
|
|
return __ioremap(addr, size, _PAGE_NO_CACHE);
|
|
}
|
|
EXPORT_SYMBOL(ioremap);
|
|
|
|
void __iomem *
|
|
ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
|
|
{
|
|
return __ioremap(addr, size, flags);
|
|
}
|
|
EXPORT_SYMBOL(ioremap_flags);
|
|
|
|
void __iomem *
|
|
__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
|
|
{
|
|
unsigned long v, i;
|
|
phys_addr_t p;
|
|
int err;
|
|
|
|
/*
|
|
* Choose an address to map it to.
|
|
* Once the vmalloc system is running, we use it.
|
|
* Before then, we use space going down from ioremap_base
|
|
* (ioremap_bot records where we're up to).
|
|
*/
|
|
p = addr & PAGE_MASK;
|
|
size = PAGE_ALIGN(addr + size) - p;
|
|
|
|
/*
|
|
* If the address lies within the first 16 MB, assume it's in ISA
|
|
* memory space
|
|
*/
|
|
if (p < 16*1024*1024)
|
|
p += _ISA_MEM_BASE;
|
|
|
|
/*
|
|
* Don't allow anybody to remap normal RAM that we're using.
|
|
* mem_init() sets high_memory so only do the check after that.
|
|
*/
|
|
if (mem_init_done && (p < virt_to_phys(high_memory))) {
|
|
printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
|
|
(unsigned long long)p, __builtin_return_address(0));
|
|
return NULL;
|
|
}
|
|
|
|
if (size == 0)
|
|
return NULL;
|
|
|
|
/*
|
|
* Is it already mapped? Perhaps overlapped by a previous
|
|
* BAT mapping. If the whole area is mapped then we're done,
|
|
* otherwise remap it since we want to keep the virt addrs for
|
|
* each request contiguous.
|
|
*
|
|
* We make the assumption here that if the bottom and top
|
|
* of the range we want are mapped then it's mapped to the
|
|
* same virt address (and this is contiguous).
|
|
* -- Cort
|
|
*/
|
|
if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
|
|
goto out;
|
|
|
|
if ((v = p_mapped_by_tlbcam(p)))
|
|
goto out;
|
|
|
|
if (mem_init_done) {
|
|
struct vm_struct *area;
|
|
area = get_vm_area(size, VM_IOREMAP);
|
|
if (area == 0)
|
|
return NULL;
|
|
v = (unsigned long) area->addr;
|
|
} else {
|
|
v = (ioremap_bot -= size);
|
|
}
|
|
|
|
if ((flags & _PAGE_PRESENT) == 0)
|
|
flags |= _PAGE_KERNEL;
|
|
if (flags & _PAGE_NO_CACHE)
|
|
flags |= _PAGE_GUARDED;
|
|
|
|
/*
|
|
* Should check if it is a candidate for a BAT mapping
|
|
*/
|
|
|
|
err = 0;
|
|
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
|
|
err = map_page(v+i, p+i, flags);
|
|
if (err) {
|
|
if (mem_init_done)
|
|
vunmap((void *)v);
|
|
return NULL;
|
|
}
|
|
|
|
out:
|
|
return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
|
|
}
|
|
EXPORT_SYMBOL(__ioremap);
|
|
|
|
void iounmap(volatile void __iomem *addr)
|
|
{
|
|
/*
|
|
* If mapped by BATs then there is nothing to do.
|
|
* Calling vfree() generates a benign warning.
|
|
*/
|
|
if (v_mapped_by_bats((unsigned long)addr)) return;
|
|
|
|
if (addr > high_memory && (unsigned long) addr < ioremap_bot)
|
|
vunmap((void *) (PAGE_MASK & (unsigned long)addr));
|
|
}
|
|
EXPORT_SYMBOL(iounmap);
|
|
|
|
int map_page(unsigned long va, phys_addr_t pa, int flags)
|
|
{
|
|
pmd_t *pd;
|
|
pte_t *pg;
|
|
int err = -ENOMEM;
|
|
|
|
/* Use upper 10 bits of VA to index the first level map */
|
|
pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
|
|
/* Use middle 10 bits of VA to index the second-level map */
|
|
pg = pte_alloc_kernel(pd, va);
|
|
if (pg != 0) {
|
|
err = 0;
|
|
/* The PTE should never be already set nor present in the
|
|
* hash table
|
|
*/
|
|
BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE));
|
|
set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
|
|
__pgprot(flags)));
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Map in all of physical memory starting at KERNELBASE.
|
|
*/
|
|
void __init mapin_ram(void)
|
|
{
|
|
unsigned long v, p, s, f;
|
|
int ktext;
|
|
|
|
s = mmu_mapin_ram();
|
|
v = KERNELBASE + s;
|
|
p = PPC_MEMSTART + s;
|
|
for (; s < total_lowmem; s += PAGE_SIZE) {
|
|
ktext = ((char *) v >= _stext && (char *) v < etext);
|
|
f = ktext ?_PAGE_RAM_TEXT : _PAGE_RAM;
|
|
map_page(v, p, f);
|
|
#ifdef CONFIG_PPC_STD_MMU_32
|
|
if (ktext)
|
|
hash_preload(&init_mm, v, 0, 0x300);
|
|
#endif
|
|
v += PAGE_SIZE;
|
|
p += PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
/* Scan the real Linux page tables and return a PTE pointer for
|
|
* a virtual address in a context.
|
|
* Returns true (1) if PTE was found, zero otherwise. The pointer to
|
|
* the PTE pointer is unmodified if PTE is not found.
|
|
*/
|
|
int
|
|
get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
int retval = 0;
|
|
|
|
pgd = pgd_offset(mm, addr & PAGE_MASK);
|
|
if (pgd) {
|
|
pud = pud_offset(pgd, addr & PAGE_MASK);
|
|
if (pud && pud_present(*pud)) {
|
|
pmd = pmd_offset(pud, addr & PAGE_MASK);
|
|
if (pmd_present(*pmd)) {
|
|
pte = pte_offset_map(pmd, addr & PAGE_MASK);
|
|
if (pte) {
|
|
retval = 1;
|
|
*ptep = pte;
|
|
if (pmdp)
|
|
*pmdp = pmd;
|
|
/* XXX caller needs to do pte_unmap, yuck */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return(retval);
|
|
}
|
|
|
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
|
|
static int __change_page_attr(struct page *page, pgprot_t prot)
|
|
{
|
|
pte_t *kpte;
|
|
pmd_t *kpmd;
|
|
unsigned long address;
|
|
|
|
BUG_ON(PageHighMem(page));
|
|
address = (unsigned long)page_address(page);
|
|
|
|
if (v_mapped_by_bats(address) || v_mapped_by_tlbcam(address))
|
|
return 0;
|
|
if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
|
|
return -EINVAL;
|
|
set_pte_at(&init_mm, address, kpte, mk_pte(page, prot));
|
|
wmb();
|
|
flush_HPTE(0, address, pmd_val(*kpmd));
|
|
pte_unmap(kpte);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Change the page attributes of an page in the linear mapping.
|
|
*
|
|
* THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
|
|
*/
|
|
static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
|
|
{
|
|
int i, err = 0;
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
for (i = 0; i < numpages; i++, page++) {
|
|
err = __change_page_attr(page, prot);
|
|
if (err)
|
|
break;
|
|
}
|
|
local_irq_restore(flags);
|
|
return err;
|
|
}
|
|
|
|
|
|
void kernel_map_pages(struct page *page, int numpages, int enable)
|
|
{
|
|
if (PageHighMem(page))
|
|
return;
|
|
|
|
change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
|
|
}
|
|
#endif /* CONFIG_DEBUG_PAGEALLOC */
|