mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
parisc: Use per-pagetable spinlock
PA-RISC uses a global spinlock to protect pagetable updates in the TLB fault handlers. When multiple cores are taking TLB faults simultaneously, the cache line containing the spinlock becomes a bottleneck. This patch embeds the spinlock in the top level page directory, so that every process has its own lock. It improves performance by 30% when doing parallel compilations. At least on the N class systems, only one PxTLB inter processor broadcast can be active at any one time on the Merced bus. If a Merced bus is found, this patch serializes the TLB flushes with the pa_tlb_flush_lock spinlock. v1: Initial patch by Mikulas v2: Added Merced detection by Helge v3: Revised TLB serialization by Dave & Helge Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: John David Anglin <dave.anglin@bell.net> Signed-off-by: Helge Deller <deller@gmx.de>
This commit is contained in:
parent
d19a12906e
commit
b37d1c1898
@ -120,7 +120,7 @@ extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
|
|||||||
extern void init_parisc_bus(void);
|
extern void init_parisc_bus(void);
|
||||||
extern struct device *hwpath_to_device(struct hardware_path *modpath);
|
extern struct device *hwpath_to_device(struct hardware_path *modpath);
|
||||||
extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
|
extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
|
||||||
|
extern int machine_has_merced_bus(void);
|
||||||
|
|
||||||
/* inventory.c: */
|
/* inventory.c: */
|
||||||
extern void do_memory_inventory(void);
|
extern void do_memory_inventory(void);
|
||||||
|
@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||||||
__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
|
__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
spin_lock_init(pgd_spinlock(actual_pgd));
|
||||||
return actual_pgd;
|
return actual_pgd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/cache.h>
|
#include <asm/cache.h>
|
||||||
|
|
||||||
extern spinlock_t pa_tlb_lock;
|
static inline spinlock_t *pgd_spinlock(pgd_t *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
|
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
|
||||||
@ -34,16 +34,46 @@ extern spinlock_t pa_tlb_lock;
|
|||||||
*/
|
*/
|
||||||
#define kern_addr_valid(addr) (1)
|
#define kern_addr_valid(addr) (1)
|
||||||
|
|
||||||
/* Purge data and instruction TLB entries. Must be called holding
|
/* This is for the serialization of PxTLB broadcasts. At least on the N class
|
||||||
* the pa_tlb_lock. The TLB purge instructions are slow on SMP
|
* systems, only one PxTLB inter processor broadcast can be active at any one
|
||||||
* machines since the purge must be broadcast to all CPUs.
|
* time on the Merced bus.
|
||||||
|
|
||||||
|
* PTE updates are protected by locks in the PMD.
|
||||||
|
*/
|
||||||
|
extern spinlock_t pa_tlb_flush_lock;
|
||||||
|
extern spinlock_t pa_swapper_pg_lock;
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||||
|
extern int pa_serialize_tlb_flushes;
|
||||||
|
#else
|
||||||
|
#define pa_serialize_tlb_flushes (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define purge_tlb_start(flags) do { \
|
||||||
|
if (pa_serialize_tlb_flushes) \
|
||||||
|
spin_lock_irqsave(&pa_tlb_flush_lock, flags); \
|
||||||
|
else \
|
||||||
|
local_irq_save(flags); \
|
||||||
|
} while (0)
|
||||||
|
#define purge_tlb_end(flags) do { \
|
||||||
|
if (pa_serialize_tlb_flushes) \
|
||||||
|
spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \
|
||||||
|
else \
|
||||||
|
local_irq_restore(flags); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* Purge data and instruction TLB entries. The TLB purge instructions
|
||||||
|
* are slow on SMP machines since the purge must be broadcast to all CPUs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
|
static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
|
||||||
{
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
purge_tlb_start(flags);
|
||||||
mtsp(mm->context, 1);
|
mtsp(mm->context, 1);
|
||||||
pdtlb(addr);
|
pdtlb(addr);
|
||||||
pitlb(addr);
|
pitlb(addr);
|
||||||
|
purge_tlb_end(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Certain architectures need to do special things when PTEs
|
/* Certain architectures need to do special things when PTEs
|
||||||
@ -59,11 +89,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
|
|||||||
do { \
|
do { \
|
||||||
pte_t old_pte; \
|
pte_t old_pte; \
|
||||||
unsigned long flags; \
|
unsigned long flags; \
|
||||||
spin_lock_irqsave(&pa_tlb_lock, flags); \
|
spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
|
||||||
old_pte = *ptep; \
|
old_pte = *ptep; \
|
||||||
set_pte(ptep, pteval); \
|
set_pte(ptep, pteval); \
|
||||||
purge_tlb_entries(mm, addr); \
|
purge_tlb_entries(mm, addr); \
|
||||||
spin_unlock_irqrestore(&pa_tlb_lock, flags); \
|
spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
@ -88,10 +118,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
|
|||||||
#if CONFIG_PGTABLE_LEVELS == 3
|
#if CONFIG_PGTABLE_LEVELS == 3
|
||||||
#define PGD_ORDER 1 /* Number of pages per pgd */
|
#define PGD_ORDER 1 /* Number of pages per pgd */
|
||||||
#define PMD_ORDER 1 /* Number of pages per pmd */
|
#define PMD_ORDER 1 /* Number of pages per pmd */
|
||||||
#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */
|
#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */
|
||||||
#else
|
#else
|
||||||
#define PGD_ORDER 1 /* Number of pages per pgd */
|
#define PGD_ORDER 1 /* Number of pages per pgd */
|
||||||
#define PGD_ALLOC_ORDER PGD_ORDER
|
#define PGD_ALLOC_ORDER (PGD_ORDER + 1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Definitions for 3rd level (we use PLD here for Page Lower directory
|
/* Definitions for 3rd level (we use PLD here for Page Lower directory
|
||||||
@ -459,6 +489,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
|
|||||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||||
|
|
||||||
|
|
||||||
|
static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
|
||||||
|
{
|
||||||
|
if (unlikely(pgd == swapper_pg_dir))
|
||||||
|
return &pa_swapper_pg_lock;
|
||||||
|
return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
|
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
@ -467,15 +506,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
|
|||||||
if (!pte_young(*ptep))
|
if (!pte_young(*ptep))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
spin_lock_irqsave(&pa_tlb_lock, flags);
|
spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
|
||||||
pte = *ptep;
|
pte = *ptep;
|
||||||
if (!pte_young(pte)) {
|
if (!pte_young(pte)) {
|
||||||
spin_unlock_irqrestore(&pa_tlb_lock, flags);
|
spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
set_pte(ptep, pte_mkold(pte));
|
set_pte(ptep, pte_mkold(pte));
|
||||||
purge_tlb_entries(vma->vm_mm, addr);
|
purge_tlb_entries(vma->vm_mm, addr);
|
||||||
spin_unlock_irqrestore(&pa_tlb_lock, flags);
|
spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -485,11 +524,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
|||||||
pte_t old_pte;
|
pte_t old_pte;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&pa_tlb_lock, flags);
|
spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
|
||||||
old_pte = *ptep;
|
old_pte = *ptep;
|
||||||
set_pte(ptep, __pte(0));
|
set_pte(ptep, __pte(0));
|
||||||
purge_tlb_entries(mm, addr);
|
purge_tlb_entries(mm, addr);
|
||||||
spin_unlock_irqrestore(&pa_tlb_lock, flags);
|
spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
|
||||||
|
|
||||||
return old_pte;
|
return old_pte;
|
||||||
}
|
}
|
||||||
@ -497,10 +536,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
|||||||
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
spin_lock_irqsave(&pa_tlb_lock, flags);
|
spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
|
||||||
set_pte(ptep, pte_wrprotect(*ptep));
|
set_pte(ptep, pte_wrprotect(*ptep));
|
||||||
purge_tlb_entries(mm, addr);
|
purge_tlb_entries(mm, addr);
|
||||||
spin_unlock_irqrestore(&pa_tlb_lock, flags);
|
spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define pte_same(A,B) (pte_val(A) == pte_val(B))
|
#define pte_same(A,B) (pte_val(A) == pte_val(B))
|
||||||
|
@ -8,21 +8,6 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
|
|
||||||
/* This is for the serialisation of PxTLB broadcasts. At least on the
|
|
||||||
* N class systems, only one PxTLB inter processor broadcast can be
|
|
||||||
* active at any one time on the Merced bus. This tlb purge
|
|
||||||
* synchronisation is fairly lightweight and harmless so we activate
|
|
||||||
* it on all systems not just the N class.
|
|
||||||
|
|
||||||
* It is also used to ensure PTE updates are atomic and consistent
|
|
||||||
* with the TLB.
|
|
||||||
*/
|
|
||||||
extern spinlock_t pa_tlb_lock;
|
|
||||||
|
|
||||||
#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_lock, flags)
|
|
||||||
#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_lock, flags)
|
|
||||||
|
|
||||||
extern void flush_tlb_all(void);
|
extern void flush_tlb_all(void);
|
||||||
extern void flush_tlb_all_local(void *);
|
extern void flush_tlb_all_local(void *);
|
||||||
|
|
||||||
@ -79,13 +64,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
|
|||||||
static inline void flush_tlb_page(struct vm_area_struct *vma,
|
static inline void flush_tlb_page(struct vm_area_struct *vma,
|
||||||
unsigned long addr)
|
unsigned long addr)
|
||||||
{
|
{
|
||||||
unsigned long flags, sid;
|
purge_tlb_entries(vma->vm_mm, addr);
|
||||||
|
|
||||||
sid = vma->vm_mm->context;
|
|
||||||
purge_tlb_start(flags);
|
|
||||||
mtsp(sid, 1);
|
|
||||||
pdtlb(addr);
|
|
||||||
pitlb(addr);
|
|
||||||
purge_tlb_end(flags);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -40,12 +40,19 @@ void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
|||||||
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
|
||||||
|
|
||||||
|
|
||||||
/* On some machines (e.g. ones with the Merced bus), there can be
|
/* On some machines (i.e., ones with the Merced bus), there can be
|
||||||
* only a single PxTLB broadcast at a time; this must be guaranteed
|
* only a single PxTLB broadcast at a time; this must be guaranteed
|
||||||
* by software. We put a spinlock around all TLB flushes to
|
* by software. We need a spinlock around all TLB flushes to ensure
|
||||||
* ensure this.
|
* this.
|
||||||
*/
|
*/
|
||||||
DEFINE_SPINLOCK(pa_tlb_lock);
|
DEFINE_SPINLOCK(pa_tlb_flush_lock);
|
||||||
|
|
||||||
|
/* Swapper page setup lock. */
|
||||||
|
DEFINE_SPINLOCK(pa_swapper_pg_lock);
|
||||||
|
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||||
|
int pa_serialize_tlb_flushes __read_mostly;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct pdc_cache_info cache_info __read_mostly;
|
struct pdc_cache_info cache_info __read_mostly;
|
||||||
#ifndef CONFIG_PA20
|
#ifndef CONFIG_PA20
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
#include <asm/pdc.h>
|
#include <asm/pdc.h>
|
||||||
#include <asm/parisc-device.h>
|
#include <asm/parisc-device.h>
|
||||||
|
#include <asm/ropes.h>
|
||||||
|
|
||||||
/* See comments in include/asm-parisc/pci.h */
|
/* See comments in include/asm-parisc/pci.h */
|
||||||
const struct dma_map_ops *hppa_dma_ops __read_mostly;
|
const struct dma_map_ops *hppa_dma_ops __read_mostly;
|
||||||
@ -257,6 +258,30 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
|
|||||||
return ret ? d.dev : NULL;
|
return ret ? d.dev : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __init is_IKE_device(struct device *dev, void *data)
|
||||||
|
{
|
||||||
|
struct parisc_device *pdev = to_parisc_device(dev);
|
||||||
|
|
||||||
|
if (!check_dev(dev))
|
||||||
|
return 0;
|
||||||
|
if (pdev->id.hw_type != HPHW_BCPORT)
|
||||||
|
return 0;
|
||||||
|
if (IS_IKE(pdev) ||
|
||||||
|
(pdev->id.hversion == REO_MERCED_PORT) ||
|
||||||
|
(pdev->id.hversion == REOG_MERCED_PORT)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __init machine_has_merced_bus(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = for_each_padev(is_IKE_device, NULL);
|
||||||
|
return ret ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* find_pa_parent_type - Find a parent of a specific type
|
* find_pa_parent_type - Find a parent of a specific type
|
||||||
* @dev: The device to start searching from
|
* @dev: The device to start searching from
|
||||||
|
@ -50,12 +50,8 @@
|
|||||||
|
|
||||||
.import pa_tlb_lock,data
|
.import pa_tlb_lock,data
|
||||||
.macro load_pa_tlb_lock reg
|
.macro load_pa_tlb_lock reg
|
||||||
#if __PA_LDCW_ALIGNMENT > 4
|
mfctl %cr25,\reg
|
||||||
load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
|
addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
|
||||||
depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
|
|
||||||
#else
|
|
||||||
load32 PA(pa_tlb_lock), \reg
|
|
||||||
#endif
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/* space_to_prot macro creates a prot id from a space id */
|
/* space_to_prot macro creates a prot id from a space id */
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/parisc-device.h>
|
#include <asm/parisc-device.h>
|
||||||
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Debug options
|
** Debug options
|
||||||
@ -638,4 +639,10 @@ void __init do_device_inventory(void)
|
|||||||
}
|
}
|
||||||
printk(KERN_INFO "Found devices:\n");
|
printk(KERN_INFO "Found devices:\n");
|
||||||
print_parisc_devices();
|
print_parisc_devices();
|
||||||
|
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||||
|
pa_serialize_tlb_flushes = machine_has_merced_bus();
|
||||||
|
if (pa_serialize_tlb_flushes)
|
||||||
|
pr_info("Merced bus found: Enable PxTLB serialization.\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -343,6 +343,12 @@ static int __init parisc_init(void)
|
|||||||
boot_cpu_data.cpu_hz / 1000000,
|
boot_cpu_data.cpu_hz / 1000000,
|
||||||
boot_cpu_data.cpu_hz % 1000000 );
|
boot_cpu_data.cpu_hz % 1000000 );
|
||||||
|
|
||||||
|
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
|
||||||
|
/* Don't serialize TLB flushes if we run on one CPU only. */
|
||||||
|
if (num_online_cpus() == 1)
|
||||||
|
pa_serialize_tlb_flushes = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
apply_alternatives_all();
|
apply_alternatives_all();
|
||||||
parisc_setup_cache_timing();
|
parisc_setup_cache_timing();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user