From 71196a26d85be2aae2e587dfd3c3190a651133c6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 27 Mar 2013 02:20:45 +0000 Subject: [PATCH 1/8] sparc:remove unused declaration smp_boot_cpus() smp_boot_cpus() was replaced smp_prepare_cpus() long ago, and it no longer needed, so delete it. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index b73da3c5f10a..68503d3d1b9e 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -46,7 +46,6 @@ void sun4m_init_smp(void); void sun4d_init_smp(void); void smp_callin(void); -void smp_boot_cpus(void); void smp_store_cpu_info(int); void smp_resched_interrupt(void); From 54df2db36c93bbb8c757f172d97c577040de6abb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:43 +0000 Subject: [PATCH 2/8] sparc/srmmu: clear trailing edge of bitmap properly srmmu_nocache_bitmap is cleared by bit_map_init(). But bit_map_init() attempts to clear by memset(), so it can't clear the trailing edge of bitmap properly on big-endian architecture if the number of bits is not a multiple of BITS_PER_LONG. Actually, the number of bits in srmmu_nocache_bitmap is not always a multiple of BITS_PER_LONG. It is calculated as below: bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT; srmmu_nocache_size is decided proportionally by the amount of system RAM and it is rounded to a multiple of PAGE_SIZE. SRMMU_NOCACHE_BITMAP_SHIFT is defined as (PAGE_SHIFT - 4). So it can only be said that bitmap_bits is a multiple of 16. This fixes the problem by using bitmap_clear() instead of memset() in bit_map_init() and this also uses BITS_TO_LONGS() to calculate correct size at bitmap allocation time. Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/lib/bitext.c | 6 +----- arch/sparc/mm/srmmu.c | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 48d00e72ce15..8ec4e9c0251a 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -119,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len) void bit_map_init(struct bit_map *t, unsigned long *map, int size) { - - if ((size & 07) != 0) - BUG(); - memset(map, 0, size>>3); - + bitmap_zero(map, size); memset(t, 0, sizeof *t); spin_lock_init(&t->lock); t->map = map; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c38bb72e3e80..036c2797dece 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -280,7 +280,9 @@ static void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); From 9a0ac1b6af111f2fa0f2a39e9bbbebb74da7d26a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:44 +0000 Subject: [PATCH 3/8] sparc/iommu: fix typo s/265KB/256KB/ IOMMU_NPTES is 64K PTEs, so the size is 256KB (= 64K * sizeof(iopte_t)) Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 0f4f7191fbba..28f96f27c768 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -34,7 +34,7 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ /* srmmu.c */ From bf3aece8af91eabea2cfb2cbe528abc394695163 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 30 Mar 2013 00:28:26 +0000 Subject: [PATCH 4/8] sparc:cleanup unused code in smp_32.h After genirq and generic clockevent support at sparc32, smp4m_irq_rotate(), prof_multiplier() and prof_counter() are no longer used and should be removed. Find more info from commit 6baa9b20 & 62f08283. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 68503d3d1b9e..3c8917f054de 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -36,7 +36,6 @@ typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void cpu_panic(void); -extern void smp4m_irq_rotate(int cpu); /* * General functions that each host system must provide. @@ -106,9 +105,6 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) -#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier -#define prof_counter(__cpu) cpu_data(__cpu).counter - void smp_setup_cpu_possible_map(void); #endif /* !(__ASSEMBLY__) */ From a2d34dd41212032c03e77bc30c2023725def841a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 30 Mar 2013 11:44:22 +0000 Subject: [PATCH 5/8] sparc: use generic headers Use "generic-y" to add generic headers where possible Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 4 ++++ arch/sparc/include/asm/cputime.h | 6 ------ arch/sparc/include/asm/emergency-restart.h | 6 ------ arch/sparc/include/asm/mutex.h | 9 --------- arch/sparc/include/asm/serial.h | 6 ------ 5 files changed, 4 insertions(+), 27 deletions(-) delete mode 100644 arch/sparc/include/asm/cputime.h delete mode 100644 arch/sparc/include/asm/emergency-restart.h delete mode 100644 arch/sparc/include/asm/mutex.h delete mode 100644 arch/sparc/include/asm/serial.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e26d430ce2fd..f73884bb286a 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,11 +2,15 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h generic-y += local64.h +generic-y += mutex.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += serial.h generic-y += trace_clock.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/asm/cputime.h b/arch/sparc/include/asm/cputime.h deleted file mode 100644 index 1a642b81e019..000000000000 --- a/arch/sparc/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_CPUTIME_H -#define __SPARC_CPUTIME_H - -#include - -#endif /* __SPARC_CPUTIME_H */ diff --git a/arch/sparc/include/asm/emergency-restart.h b/arch/sparc/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/sparc/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sparc/include/asm/mutex.h b/arch/sparc/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/sparc/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/sparc/include/asm/serial.h b/arch/sparc/include/asm/serial.h deleted file mode 100644 index f90d61c28059..000000000000 --- a/arch/sparc/include/asm/serial.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SERIAL_H -#define __SPARC_SERIAL_H - -#define BASE_BAUD ( 1843200 / 16 ) - -#endif /* __SPARC_SERIAL_H */ From 6e51f857303d02770d5f3ff8f0436487168d0230 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 30 Mar 2013 11:44:24 +0000 Subject: [PATCH 6/8] bbc_i2c: fix section mismatch warning Fix following warning: WARNING: drivers/sbus/char/bbc.o(.text+0x674): Section mismatch in reference from the function bbc_i2c_probe() to the function .init.text:T.463() The function bbc_i2c_probe() references the function __init T.463(). This is often because bbc_i2c_probe lacks a __init annotation or the annotation of T.463 is wrong. bbc_i2c_probe() referenced the inlined attach_one_i2c(). As probe may be called after init drop __init annotations on all functions used by bbc_i2c_probe() The warning was seen with a sparc64 defconfig build Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- drivers/sbus/char/bbc_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 1a9d1e3ce64c..c1441ed282eb 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -282,7 +282,7 @@ static irqreturn_t bbc_i2c_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void __init reset_one_i2c(struct bbc_i2c_bus *bp) +static void reset_one_i2c(struct bbc_i2c_bus *bp) { writeb(I2C_PCF_PIN, bp->i2c_control_regs + 0x0); writeb(bp->own, bp->i2c_control_regs + 0x1); @@ -291,7 +291,7 @@ static void __init reset_one_i2c(struct bbc_i2c_bus *bp) writeb(I2C_PCF_IDLE, bp->i2c_control_regs + 0x0); } -static struct bbc_i2c_bus * __init attach_one_i2c(struct platform_device *op, int index) +static struct bbc_i2c_bus * attach_one_i2c(struct platform_device *op, int index) { struct bbc_i2c_bus *bp; struct device_node *dp; From cbf1ef6b3345d2cc7e62407eec6a6f72a8b1346f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 31 Mar 2013 07:01:47 +0000 Subject: [PATCH 7/8] sparc: use asm-generic version of types.h In sparc headers we use the following pattern: #if defined(__sparc__) && defined(__arch64__) sparc64 specific stuff #else sparc32 specific stuff #endif In types.h this pattern was not followed and here we only checked for __sparc__ for no good reason. It was a left-over from long time ago. I checked other architectures - and most of them do not have any such checks. And all the recently merged versions uses the asm-generic version. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/Kbuild | 1 - arch/sparc/include/uapi/asm/types.h | 17 ----------------- 3 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/sparc/include/uapi/asm/types.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index f73884bb286a..ff18e3cfb6b1 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -13,4 +13,5 @@ generic-y += local.h generic-y += module.h generic-y += serial.h generic-y += trace_clock.h +generic-y += types.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index ce175aff71b7..b5843ee09fb5 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -44,7 +44,6 @@ header-y += swab.h header-y += termbits.h header-y += termios.h header-y += traps.h -header-y += types.h header-y += uctx.h header-y += unistd.h header-y += utrap.h diff --git a/arch/sparc/include/uapi/asm/types.h b/arch/sparc/include/uapi/asm/types.h deleted file mode 100644 index 383d156cde9c..000000000000 --- a/arch/sparc/include/uapi/asm/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _SPARC_TYPES_H -#define _SPARC_TYPES_H -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ - -#if defined(__sparc__) - -#include - -#endif /* defined(__sparc__) */ - -#endif /* defined(_SPARC_TYPES_H) */ From f36391d2790d04993f48da6a45810033a2cdf847 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Apr 2013 17:26:26 -0400 Subject: [PATCH 8/8] sparc64: Fix race in TLB batch processing. As reported by Dave Kleikamp, when we emit cross calls to do batched TLB flush processing we have a race because we do not synchronize on the sibling cpus completing the cross call. So meanwhile the TLB batch can be reset (tb->tlb_nr set to zero, etc.) and either flushes are missed or flushes will flush the wrong addresses. Fix this by using generic infrastructure to synchonize on the completion of the cross call. This first required getting the flush_tlb_pending() call out from switch_to() which operates with locks held and interrupts disabled. The problem is that smp_call_function_many() cannot be invoked with IRQs disabled and this is explicitly checked for with WARN_ON_ONCE(). We get the batch processing outside of locked IRQ disabled sections by using some ideas from the powerpc port. Namely, we only batch inside of arch_{enter,leave}_lazy_mmu_mode() calls. If we're not in such a region, we flush TLBs synchronously. 1) Get rid of xcall_flush_tlb_pending and per-cpu type implementations. 2) Do TLB batch cross calls instead via: smp_call_function_many() tlb_pending_func() __flush_tlb_pending() 3) Batch only in lazy mmu sequences: a) Add 'active' member to struct tlb_batch b) Define __HAVE_ARCH_ENTER_LAZY_MMU_MODE c) Set 'active' in arch_enter_lazy_mmu_mode() d) Run batch and clear 'active' in arch_leave_lazy_mmu_mode() e) Check 'active' in tlb_batch_add_one() and do a synchronous flush if it's clear. 4) Add infrastructure for synchronous TLB page flushes. a) Implement __flush_tlb_page and per-cpu variants, patch as needed. b) Likewise for xcall_flush_tlb_page. c) Implement smp_flush_tlb_page() to invoke the cross-call. d) Wire up global_flush_tlb_page() to the right routine based upon CONFIG_SMP 5) It turns out that singleton batches are very common, 2 out of every 3 batch flushes have only a single entry in them. The batch flush waiting is very expensive, both because of the poll on sibling cpu completeion, as well as because passing the tlb batch pointer to the sibling cpus invokes a shared memory dereference. Therefore, in flush_tlb_pending(), if there is only one entry in the batch perform a completely asynchronous global_flush_tlb_page() instead. Reported-by: Dave Kleikamp Signed-off-by: David S. Miller Acked-by: Dave Kleikamp --- arch/sparc/include/asm/pgtable_64.h | 1 + arch/sparc/include/asm/switch_to_64.h | 3 +- arch/sparc/include/asm/tlbflush_64.h | 37 ++++++-- arch/sparc/kernel/smp_64.c | 43 ++++++++-- arch/sparc/mm/tlb.c | 38 +++++++- arch/sparc/mm/tsb.c | 57 ++++++++---- arch/sparc/mm/ultra.S | 119 ++++++++++++++++++++------ 7 files changed, 242 insertions(+), 56 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 08fcce90316b..7619f2f792af 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -915,6 +915,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot); } +#include #include /* We provide our own get_unmapped_area to cope with VA holes and diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index cad36f56fa03..c7de3323819c 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -18,8 +18,7 @@ do { \ * and 2 stores in this critical code path. -DaveM */ #define switch_to(prev, next, last) \ -do { flush_tlb_pending(); \ - save_and_clear_fpu(); \ +do { save_and_clear_fpu(); \ /* If you are tempted to conditionalize the following */ \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index 2ef463494153..f0d6a9700f4c 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -11,24 +11,40 @@ struct tlb_batch { struct mm_struct *mm; unsigned long tlb_nr; + unsigned long active; unsigned long vaddrs[TLB_BATCH_NR]; }; extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); extern void flush_tsb_user(struct tlb_batch *tb); +extern void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); /* TLB flush operations. */ -extern void flush_tlb_pending(void); +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} -#define flush_tlb_range(vma,start,end) \ - do { (void)(start); flush_tlb_pending(); } while (0) -#define flush_tlb_page(vma,addr) flush_tlb_pending() -#define flush_tlb_mm(mm) flush_tlb_pending() +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE + +extern void flush_tlb_pending(void); +extern void arch_enter_lazy_mmu_mode(void); +extern void arch_leave_lazy_mmu_mode(void); +#define arch_flush_lazy_mmu_mode() do {} while (0) /* Local cpu only. */ extern void __flush_tlb_all(void); - +extern void __flush_tlb_page(unsigned long context, unsigned long vaddr); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP @@ -38,15 +54,24 @@ do { flush_tsb_kernel_range(start,end); \ __flush_tlb_kernel_range(start,end); \ } while (0) +static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); +} + #else /* CONFIG_SMP */ extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); #define flush_tlb_kernel_range(start, end) \ do { flush_tsb_kernel_range(start,end); \ smp_flush_tlb_kernel_range(start, end); \ } while (0) +#define global_flush_tlb_page(mm, vaddr) \ + smp_flush_tlb_page(mm, vaddr) + #endif /* ! CONFIG_SMP */ #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 537eb66abd06..ca64d2a86ec0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -849,7 +849,7 @@ void smp_tsb_sync(struct mm_struct *mm) } extern unsigned long xcall_flush_tlb_mm; -extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_kernel_range; extern unsigned long xcall_fetch_glob_regs; extern unsigned long xcall_fetch_glob_pmu; @@ -1074,19 +1074,52 @@ local_flush_and_out: put_cpu(); } +struct tlb_pending_info { + unsigned long ctx; + unsigned long nr; + unsigned long *vaddrs; +}; + +static void tlb_pending_func(void *info) +{ + struct tlb_pending_info *t = info; + + __flush_tlb_pending(t->ctx, t->nr, t->vaddrs); +} + void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) { u32 ctx = CTX_HWBITS(mm->context); + struct tlb_pending_info info; + int cpu = get_cpu(); + + info.ctx = ctx; + info.nr = nr; + info.vaddrs = vaddrs; + + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) + cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); + else + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); + + __flush_tlb_pending(ctx, nr, vaddrs); + + put_cpu(); +} + +void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long context = CTX_HWBITS(mm->context); int cpu = get_cpu(); if (mm == current->mm && atomic_read(&mm->mm_users) == 1) cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); else - smp_cross_call_masked(&xcall_flush_tlb_pending, - ctx, nr, (unsigned long) vaddrs, + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, mm_cpumask(mm)); - - __flush_tlb_pending(ctx, nr, vaddrs); + __flush_tlb_page(context, vaddr); put_cpu(); } diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index ba6ae7ffdc2c..272aa4f7657e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -24,11 +24,17 @@ static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); void flush_tlb_pending(void) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; - if (tb->tlb_nr) { - flush_tsb_user(tb); + if (!tb->tlb_nr) + goto out; - if (CTX_VALID(tb->mm->context)) { + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { #ifdef CONFIG_SMP smp_flush_tlb_pending(tb->mm, tb->tlb_nr, &tb->vaddrs[0]); @@ -37,12 +43,30 @@ void flush_tlb_pending(void) tb->tlb_nr, &tb->vaddrs[0]); #endif } - tb->tlb_nr = 0; } + tb->tlb_nr = 0; + +out: put_cpu_var(tlb_batch); } +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, bool exec) { @@ -60,6 +84,12 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, nr = 0; } + if (!tb->active) { + global_flush_tlb_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr); + return; + } + if (nr == 0) tb->mm = mm; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 428982b9becf..2cc3bce5ee91 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -7,11 +7,10 @@ #include #include #include -#include -#include -#include #include +#include #include +#include #include extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; @@ -46,23 +45,27 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) } } +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) +{ + unsigned long tag, ent, hash; + + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); +} + static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, unsigned long tsb, unsigned long nentries) { unsigned long i; - for (i = 0; i < tb->tlb_nr; i++) { - unsigned long v = tb->vaddrs[i]; - unsigned long tag, ent, hash; - - v &= ~0x1UL; - - hash = tsb_hash(v, hash_shift, nentries); - ent = tsb + (hash * sizeof(struct tsb)); - tag = (v >> 22UL); - - tsb_flush(ent, tag); - } + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); } void flush_tsb_user(struct tlb_batch *tb) @@ -90,6 +93,30 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index f8e13d421fcb..432aa0cb1b38 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -52,6 +52,33 @@ __flush_tlb_mm: /* 18 insns */ nop nop + .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + .align 32 .globl __flush_tlb_pending __flush_tlb_pending: /* 26 insns */ @@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + __cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 @@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns */ retl nop +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 @@ -339,6 +405,13 @@ cheetah_patch_cachetlbops: call tlb_patch_one mov 19, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 @@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop - .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=context, %g1=nr, %g7=vaddrs[] */ - sllx %g1, 3, %g1 + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */ or %g5, %g4, %g5 mov PRIMARY_CONTEXT, %g4 stxa %g5, [%g4] ASI_DMMU -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %g5 - andcc %g5, 0x1, %g0 + andcc %g1, 0x1, %g0 be,pn %icc, 2f - - andn %g5, 0x1, %g5 + andn %g1, 0x1, %g5 stxa %g0, [%g5] ASI_IMMU_DEMAP 2: stxa %g0, [%g5] ASI_DMMU_DEMAP membar #Sync - brnz,pt %g1, 1b - nop stxa %g2, [%g4] ASI_DMMU retry nop + nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: /* 25 insns */ @@ -656,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ membar #Sync retry - .globl __hypervisor_xcall_flush_tlb_pending -__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ - sllx %g1, 3, %g1 + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 mov %o2, %g4 -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g1, %o0 /* ARG0: virtual address */ mov %g5, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ srlx %o0, PAGE_SHIFT, %o0 @@ -673,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /* 21 insns */ mov HV_MMU_UNMAP_ADDR_TRAP, %g6 brnz,a,pn %o0, __hypervisor_tlb_xcall_error mov %o0, %g5 - brnz,pt %g1, 1b - nop mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 @@ -757,6 +821,13 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 10, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 @@ -788,12 +859,12 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 21, %o2 - sethi %hi(xcall_flush_tlb_pending), %o0 - or %o0, %lo(xcall_flush_tlb_pending), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 21, %o2 + mov 17, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0