diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 08ca264459bd..3b90051c0e05 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -373,7 +373,7 @@ config CPU_FEROCEON select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_COPY_FEROCEON if MMU - select CPU_TLB_V4WBI if MMU + select CPU_TLB_FEROCEON if MMU config CPU_FEROCEON_OLD_ID bool "Accept early Feroceon cores with an ARM926 ID" @@ -551,6 +551,11 @@ config CPU_TLB_V4WBI ARM Architecture Version 4 TLB with writeback cache and invalidate instruction cache entry. +config CPU_TLB_FEROCEON + bool + help + Feroceon TLB (v4wbi with non-outer-cachable page table walks). + config CPU_TLB_V6 bool @@ -709,6 +714,14 @@ config OUTER_CACHE bool default n +config CACHE_FEROCEON_L2 + bool "Enable the Feroceon L2 cache controller" + depends on FOOBAR + default y + select OUTER_CACHE + help + This option enables the Feroceon L2 cache controller. + config CACHE_L2X0 bool "Enable the L2x0 outer cache controller" depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 32b2d2d213a6..f64b92557b11 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o @@ -73,4 +74,5 @@ obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c new file mode 100644 index 000000000000..20eec4ba173f --- /dev/null +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -0,0 +1,318 @@ +/* + * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - Unified Layer 2 Cache for Feroceon CPU Cores, + * Document ID MV-S104858-00, Rev. A, October 23 2007. + */ + +#include +#include +#include + + +/* + * Low-level cache maintenance operations. + * + * As well as the regular 'clean/invalidate/flush L2 cache line by + * MVA' instructions, the Feroceon L2 cache controller also features + * 'clean/invalidate L2 range by MVA' operations. + * + * Cache range operations are initiated by writing the start and + * end addresses to successive cp15 registers, and process every + * cache line whose first byte address lies in the inclusive range + * [start:end]. + * + * The cache range operations stall the CPU pipeline until completion. + * + * The range operations require two successive cp15 writes, in + * between which we don't want to be preempted. + */ +static inline void l2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); +} + +static inline void l2_clean_mva_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) & ~(PAGE_SIZE - 1)); + + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c9, 4" : : "r" (start)); + __asm__("mcr p15, 1, %0, c15, c9, 5" : : "r" (end)); + raw_local_irq_restore(flags); +} + +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) +{ + l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end)); +} + +static inline void l2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); +} + +static inline void l2_inv_mva_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) & ~(PAGE_SIZE - 1)); + + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c11, 4" : : "r" (start)); + __asm__("mcr p15, 1, %0, c15, c11, 5" : : "r" (end)); + raw_local_irq_restore(flags); +} + +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) +{ + l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +#define CACHE_LINE_SIZE 32 +#define MAX_RANGE_SIZE 1024 + +static int l2_wt_override; + +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + unsigned long range_end; + + BUG_ON(start & (CACHE_LINE_SIZE - 1)); + BUG_ON(end & (CACHE_LINE_SIZE - 1)); + + /* + * Try to process all cache lines between 'start' and 'end'. + */ + range_end = end; + + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (range_end > start + MAX_RANGE_SIZE) + range_end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (range_end > (start | (PAGE_SIZE - 1)) + 1) + range_end = (start | (PAGE_SIZE - 1)) + 1; + + return range_end; +} + +static void feroceon_l2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + +static void feroceon_l2_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + } + + dsb(); +} + +static void feroceon_l2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + if (!l2_wt_override) + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + + +/* + * Routines to disable and re-enable the D-cache and I-cache at run + * time. These are necessary because the L2 cache can only be enabled + * or disabled while the L1 Dcache and Icache are both disabled. + */ +static void __init invalidate_and_disable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_C) { + unsigned long flags; + + raw_local_irq_save(flags); + flush_cache_all(); + set_cr(cr & ~CR_C); + raw_local_irq_restore(flags); + } +} + +static void __init enable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (!(cr & CR_C)) + set_cr(cr | CR_C); +} + +static void __init __invalidate_icache(void) +{ + int dummy; + + __asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0\n" : "=r" (dummy)); +} + +static void __init invalidate_and_disable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_I) { + set_cr(cr & ~CR_I); + __invalidate_icache(); + } +} + +static void __init enable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (!(cr & CR_I)) + set_cr(cr | CR_I); +} + +static inline u32 read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static void __init enable_l2(void) +{ + u32 u; + + u = read_extra_features(); + if (!(u & 0x00400000)) { + printk(KERN_INFO "Feroceon L2: Enabling L2\n"); + + invalidate_and_disable_dcache(); + invalidate_and_disable_icache(); + write_extra_features(u | 0x00400000); + enable_icache(); + enable_dcache(); + } +} + +void __init feroceon_l2_init(int __l2_wt_override) +{ + l2_wt_override = __l2_wt_override; + + disable_l2_prefetch(); + + outer_cache.inv_range = feroceon_l2_inv_range; + outer_cache.clean_range = feroceon_l2_clean_range; + outer_cache.flush_range = feroceon_l2_flush_range; + + enable_l2(); + + printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n", + l2_wt_override ? ", in WT override mode" : ""); +} diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index c279652a98fd..cecf96ee07d5 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -79,6 +79,13 @@ ENTRY(cpu_feroceon_proc_fin) mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE msr cpsr_c, ip bl feroceon_flush_kern_cache_all + +#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH) + mov r0, #0 + mcr p15, 1, r0, c15, c9, 0 @ clean L2 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. @@ -382,10 +389,20 @@ ENTRY(feroceon_range_cache_fns) .align 5 ENTRY(cpu_feroceon_dcache_clean_area) +#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH) + mov r2, r0 + mov r3, r1 +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b +#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH) +1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry + add r2, r2, #CACHE_DLINESIZE + subs r3, r3, #CACHE_DLINESIZE + bhi 1b +#endif mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr @@ -449,6 +466,9 @@ ENTRY(cpu_feroceon_set_pte_ext) str r2, [r0] @ hardware version mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH) + mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry +#endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif mov pc, lr diff --git a/include/asm-arm/plat-orion/cache-feroceon-l2.h b/include/asm-arm/plat-orion/cache-feroceon-l2.h new file mode 100644 index 000000000000..ba4e016d3ec0 --- /dev/null +++ b/include/asm-arm/plat-orion/cache-feroceon-l2.h @@ -0,0 +1,11 @@ +/* + * include/asm-arm/plat-orion/cache-feroceon-l2.h + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +extern void __init feroceon_l2_init(int l2_wt_override); diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h index 8c6bc1bb9d1a..909656c747ef 100644 --- a/include/asm-arm/tlbflush.h +++ b/include/asm-arm/tlbflush.h @@ -39,6 +39,7 @@ #define TLB_V6_D_ASID (1 << 17) #define TLB_V6_I_ASID (1 << 18) +#define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ #define TLB_DCLEAN (1 << 30) #define TLB_WB (1 << 31) @@ -51,6 +52,7 @@ * v4 - ARMv4 without write buffer * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction + * fr - Feroceon (v4wbi with non-outer-cacheable page table walks) * v6wbi - ARMv6 with write buffer with I TLB flush entry instruction */ #undef _TLB @@ -103,6 +105,23 @@ # define v4wbi_always_flags (-1UL) #endif +#define fr_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \ + TLB_V4_I_FULL | TLB_V4_D_FULL | \ + TLB_V4_I_PAGE | TLB_V4_D_PAGE) + +#ifdef CONFIG_CPU_TLB_FEROCEON +# define fr_possible_flags fr_tlb_flags +# define fr_always_flags fr_tlb_flags +# ifdef _TLB +# define MULTI_TLB 1 +# else +# define _TLB v4wbi +# endif +#else +# define fr_possible_flags 0 +# define fr_always_flags (-1UL) +#endif + #define v4wb_tlb_flags (TLB_WB | TLB_DCLEAN | \ TLB_V4_I_FULL | TLB_V4_D_FULL | \ TLB_V4_D_PAGE) @@ -245,12 +264,14 @@ extern struct cpu_tlb_fns cpu_tlb; #define possible_tlb_flags (v3_possible_flags | \ v4_possible_flags | \ v4wbi_possible_flags | \ + fr_possible_flags | \ v4wb_possible_flags | \ v6wbi_possible_flags) #define always_tlb_flags (v3_always_flags & \ v4_always_flags & \ v4wbi_always_flags & \ + fr_always_flags & \ v4wb_always_flags & \ v6wbi_always_flags) @@ -417,6 +438,11 @@ static inline void flush_pmd_entry(pmd_t *pmd) if (tlb_flag(TLB_DCLEAN)) asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" : : "r" (pmd) : "cc"); + + if (tlb_flag(TLB_L2CLEAN_FR)) + asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" + : : "r" (pmd) : "cc"); + if (tlb_flag(TLB_WB)) dsb(); } @@ -428,6 +454,10 @@ static inline void clean_pmd_entry(pmd_t *pmd) if (tlb_flag(TLB_DCLEAN)) asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pmd" : : "r" (pmd) : "cc"); + + if (tlb_flag(TLB_L2CLEAN_FR)) + asm("mcr p15, 1, %0, c15, c9, 1 @ L2 flush_pmd" + : : "r" (pmd) : "cc"); } #undef tlb_flag